AOMedia AV1 Codec
svc_encoder_rtc
1/*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12// This is an example demonstrating how to implement a multi-layer AOM
13// encoding scheme for RTC video applications.
14
15#include <assert.h>
16#include <inttypes.h>
17#include <limits.h>
18#include <math.h>
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22
23#include <memory>
24
25#include "config/aom_config.h"
26
27#if CONFIG_AV1_DECODER
28#include "aom/aom_decoder.h"
29#endif
30#include "aom/aom_encoder.h"
31#include "aom/aom_image.h"
32#include "aom/aom_integer.h"
33#include "aom/aomcx.h"
34#include "aom_dsp/bitwriter_buffer.h"
35#include "aom_ports/aom_timer.h"
36#include "av1/ratectrl_rtc.h"
37#include "common/args.h"
38#include "common/tools_common.h"
39#include "common/video_writer.h"
40#include "examples/encoder_util.h"
41#include "examples/multilayer_metadata.h"
42
43#define OPTION_BUFFER_SIZE 1024
44#define MAX_NUM_SPATIAL_LAYERS 4
45
46#define GOOD_QUALITY 0
47
48typedef struct {
49 const char *output_filename;
50 char options[OPTION_BUFFER_SIZE];
51 struct AvxInputContext input_ctx[MAX_NUM_SPATIAL_LAYERS];
52 int speed;
53 int aq_mode;
54 int layering_mode;
55 int output_obu;
56 int decode;
57 int tune_content;
58 int show_psnr;
59 bool use_external_rc;
60 bool scale_factors_explicitly_set;
61 const char *multilayer_metadata_file;
62} AppInput;
63
64typedef enum {
65 QUANTIZER = 0,
66 BITRATE,
67 SCALE_FACTOR,
68 AUTO_ALT_REF,
69 ALL_OPTION_TYPES
70} LAYER_OPTION_TYPE;
71
72enum { kSkip = 0, kDeltaQ = 1, kDeltaLF = 2, kReference = 3 };
73
74static const arg_def_t outputfile =
75 ARG_DEF("o", "output", 1, "Output filename");
76static const arg_def_t frames_arg =
77 ARG_DEF("f", "frames", 1, "Number of frames to encode");
78static const arg_def_t threads_arg =
79 ARG_DEF("th", "threads", 1, "Number of threads to use");
80static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
81static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
82static const arg_def_t timebase_arg =
83 ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
84static const arg_def_t bitrate_arg = ARG_DEF(
85 "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
86static const arg_def_t spatial_layers_arg =
87 ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
88static const arg_def_t temporal_layers_arg =
89 ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
90static const arg_def_t layering_mode_arg =
91 ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
92static const arg_def_t kf_dist_arg =
93 ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
94static const arg_def_t scale_factors_arg =
95 ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
96static const arg_def_t min_q_arg =
97 ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
98static const arg_def_t max_q_arg =
99 ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
100static const arg_def_t speed_arg =
101 ARG_DEF("sp", "speed", 1, "Speed configuration");
102static const arg_def_t aqmode_arg =
103 ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
104static const arg_def_t bitrates_arg =
105 ARG_DEF("bl", "bitrates", 1,
106 "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
107static const arg_def_t dropframe_thresh_arg =
108 ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
109static const arg_def_t error_resilient_arg =
110 ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
111static const arg_def_t output_obu_arg =
112 ARG_DEF(NULL, "output-obu", 1,
113 "Write OBUs when set to 1. Otherwise write IVF files.");
114static const arg_def_t test_decode_arg =
115 ARG_DEF(NULL, "test-decode", 1,
116 "Attempt to test decoding the output when set to 1. Default is 1.");
117static const arg_def_t psnr_arg =
118 ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
119static const arg_def_t ext_rc_arg =
120 ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
121static const struct arg_enum_list tune_content_enum[] = {
122 { "default", AOM_CONTENT_DEFAULT },
123 { "screen", AOM_CONTENT_SCREEN },
124 { "film", AOM_CONTENT_FILM },
125 { NULL, 0 }
126};
127static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
128 NULL, "tune-content", 1, "Tune content type", tune_content_enum);
129#if CONFIG_CWG_E050
130static const arg_def_t multilayer_metadata_file_arg =
131 ARG_DEF("ml", "multilayer_metadata_file", 1,
132 "Experimental: path to multilayer metadata file");
133#endif
134
135#if CONFIG_AV1_HIGHBITDEPTH
136static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
137 { "10", AOM_BITS_10 },
138 { NULL, 0 } };
139
140static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
141 "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
142#endif // CONFIG_AV1_HIGHBITDEPTH
143
144static const arg_def_t *svc_args[] = {
145 &frames_arg,
146 &outputfile,
147 &width_arg,
148 &height_arg,
149 &timebase_arg,
150 &bitrate_arg,
151 &spatial_layers_arg,
152 &kf_dist_arg,
153 &scale_factors_arg,
154 &min_q_arg,
155 &max_q_arg,
156 &temporal_layers_arg,
157 &layering_mode_arg,
158 &threads_arg,
159 &aqmode_arg,
160#if CONFIG_AV1_HIGHBITDEPTH
161 &bitdepth_arg,
162#endif
163 &speed_arg,
164 &bitrates_arg,
165 &dropframe_thresh_arg,
166 &error_resilient_arg,
167 &output_obu_arg,
168 &test_decode_arg,
169 &tune_content_arg,
170 &psnr_arg,
171#if CONFIG_CWG_E050
172 &multilayer_metadata_file_arg,
173#endif
174 NULL,
175};
176
177#define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
178
179static const char *exec_name;
180
181void usage_exit(void) {
182 fprintf(stderr,
183 "Usage: %s <options> input_filename [input_filename ...] -o "
184 "output_filename\n",
185 exec_name);
186 fprintf(stderr, "Options:\n");
187 arg_show_usage(stderr, svc_args);
188 fprintf(
189 stderr,
190 "Input files must be y4m or yuv.\n"
191 "If multiple input files are specified, they correspond to spatial "
192 "layers, and there should be as many as there are spatial layers.\n"
193 "All input files must have the same width, height, frame rate and number "
194 "of frames.\n"
195 "If only one file is specified, it is used for all spatial layers.\n");
196 exit(EXIT_FAILURE);
197}
198
199static int file_is_y4m(const char detect[4]) {
200 return memcmp(detect, "YUV4", 4) == 0;
201}
202
203static int fourcc_is_ivf(const char detect[4]) {
204 if (memcmp(detect, "DKIF", 4) == 0) {
205 return 1;
206 }
207 return 0;
208}
209
210static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
211 1 };
212
213static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
214
215static void open_input_file(struct AvxInputContext *input,
217 /* Parse certain options from the input file, if possible */
218 input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
219 : set_binary_mode(stdin);
220
221 if (!input->file) fatal("Failed to open input file");
222
223 if (!fseeko(input->file, 0, SEEK_END)) {
224 /* Input file is seekable. Figure out how long it is, so we can get
225 * progress info.
226 */
227 input->length = ftello(input->file);
228 rewind(input->file);
229 }
230
231 /* Default to 1:1 pixel aspect ratio. */
232 input->pixel_aspect_ratio.numerator = 1;
233 input->pixel_aspect_ratio.denominator = 1;
234
235 /* For RAW input sources, these bytes will applied on the first frame
236 * in read_frame().
237 */
238 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
239 input->detect.position = 0;
240
241 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
242 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
243 input->only_i420) >= 0) {
244 input->file_type = FILE_TYPE_Y4M;
245 input->width = input->y4m.pic_w;
246 input->height = input->y4m.pic_h;
247 input->pixel_aspect_ratio.numerator = input->y4m.par_n;
248 input->pixel_aspect_ratio.denominator = input->y4m.par_d;
249 input->framerate.numerator = input->y4m.fps_n;
250 input->framerate.denominator = input->y4m.fps_d;
251 input->fmt = input->y4m.aom_fmt;
252 input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
253 } else {
254 fatal("Unsupported Y4M stream.");
255 }
256 } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
257 fatal("IVF is not supported as input.");
258 } else {
259 input->file_type = FILE_TYPE_RAW;
260 }
261}
262
263static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
264 int *value0, int *value1) {
265 if (type == SCALE_FACTOR) {
266 *value0 = (int)strtol(input, &input, 10);
267 if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
268 *value1 = (int)strtol(input, &input, 10);
269
270 if (*value0 < option_min_values[SCALE_FACTOR] ||
271 *value1 < option_min_values[SCALE_FACTOR] ||
272 *value0 > option_max_values[SCALE_FACTOR] ||
273 *value1 > option_max_values[SCALE_FACTOR] ||
274 *value0 > *value1) // num shouldn't be greater than den
276 } else {
277 *value0 = atoi(input);
278 if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
280 }
281 return AOM_CODEC_OK;
282}
283
284static aom_codec_err_t parse_layer_options_from_string(
285 aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
286 int *option0, int *option1) {
288 char *input_string;
289 char *token;
290 const char *delim = ",";
291 int num_layers = svc_params->number_spatial_layers;
292 int i = 0;
293
294 if (type == BITRATE)
295 num_layers =
296 svc_params->number_spatial_layers * svc_params->number_temporal_layers;
297
298 if (input == NULL || option0 == NULL ||
299 (option1 == NULL && type == SCALE_FACTOR))
301
302 const size_t input_length = strlen(input);
303 input_string = reinterpret_cast<char *>(malloc(input_length + 1));
304 if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
305 memcpy(input_string, input, input_length + 1);
306 token = strtok(input_string, delim); // NOLINT
307 for (i = 0; i < num_layers; ++i) {
308 if (token != NULL) {
309 res = extract_option(type, token, option0 + i, option1 + i);
310 if (res != AOM_CODEC_OK) break;
311 token = strtok(NULL, delim); // NOLINT
312 } else {
314 break;
315 }
316 }
317 free(input_string);
318 return res;
319}
320
321static void parse_command_line(int argc, const char **argv_,
322 AppInput *app_input,
323 aom_svc_params_t *svc_params,
324 aom_codec_enc_cfg_t *enc_cfg) {
325 struct arg arg;
326 char **argv = NULL;
327 char **argi = NULL;
328 char **argj = NULL;
329 char string_options[1024] = { 0 };
330
331 // Default settings
332 svc_params->number_spatial_layers = 1;
333 svc_params->number_temporal_layers = 1;
334 app_input->layering_mode = 0;
335 app_input->output_obu = 0;
336 app_input->decode = 1;
337 enc_cfg->g_threads = 1;
338 enc_cfg->rc_end_usage = AOM_CBR;
339
340 // process command line options
341 argv = argv_dup(argc - 1, argv_ + 1);
342 if (!argv) {
343 fprintf(stderr, "Error allocating argument list\n");
344 exit(EXIT_FAILURE);
345 }
346 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
347 arg.argv_step = 1;
348
349 if (arg_match(&arg, &outputfile, argi)) {
350 app_input->output_filename = arg.val;
351 } else if (arg_match(&arg, &width_arg, argi)) {
352 enc_cfg->g_w = arg_parse_uint(&arg);
353 } else if (arg_match(&arg, &height_arg, argi)) {
354 enc_cfg->g_h = arg_parse_uint(&arg);
355 } else if (arg_match(&arg, &timebase_arg, argi)) {
356 enc_cfg->g_timebase = arg_parse_rational(&arg);
357 } else if (arg_match(&arg, &bitrate_arg, argi)) {
358 enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
359 } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
360 svc_params->number_spatial_layers = arg_parse_uint(&arg);
361 } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
362 svc_params->number_temporal_layers = arg_parse_uint(&arg);
363 } else if (arg_match(&arg, &speed_arg, argi)) {
364 app_input->speed = arg_parse_uint(&arg);
365 if (app_input->speed > 11) {
366 aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
367 }
368 } else if (arg_match(&arg, &aqmode_arg, argi)) {
369 app_input->aq_mode = arg_parse_uint(&arg);
370 } else if (arg_match(&arg, &threads_arg, argi)) {
371 enc_cfg->g_threads = arg_parse_uint(&arg);
372 } else if (arg_match(&arg, &layering_mode_arg, argi)) {
373 app_input->layering_mode = arg_parse_int(&arg);
374 } else if (arg_match(&arg, &kf_dist_arg, argi)) {
375 enc_cfg->kf_min_dist = arg_parse_uint(&arg);
376 enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
377 } else if (arg_match(&arg, &scale_factors_arg, argi)) {
378 aom_codec_err_t res = parse_layer_options_from_string(
379 svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
380 svc_params->scaling_factor_den);
381 app_input->scale_factors_explicitly_set = true;
382 if (res != AOM_CODEC_OK) {
383 die("Failed to parse scale factors: %s\n",
385 }
386 } else if (arg_match(&arg, &min_q_arg, argi)) {
387 enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
388 } else if (arg_match(&arg, &max_q_arg, argi)) {
389 enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
390#if CONFIG_AV1_HIGHBITDEPTH
391 } else if (arg_match(&arg, &bitdepth_arg, argi)) {
392 enc_cfg->g_bit_depth =
393 static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
394 switch (enc_cfg->g_bit_depth) {
395 case AOM_BITS_8:
396 enc_cfg->g_input_bit_depth = 8;
397 enc_cfg->g_profile = 0;
398 break;
399 case AOM_BITS_10:
400 enc_cfg->g_input_bit_depth = 10;
401 enc_cfg->g_profile = 0;
402 break;
403 default:
404 die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
405 }
406#endif // CONFIG_VP9_HIGHBITDEPTH
407 } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
408 enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
409 } else if (arg_match(&arg, &error_resilient_arg, argi)) {
410 enc_cfg->g_error_resilient = arg_parse_uint(&arg);
411 if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
412 die("Invalid value for error resilient (0, 1): %d.",
413 enc_cfg->g_error_resilient);
414 } else if (arg_match(&arg, &output_obu_arg, argi)) {
415 app_input->output_obu = arg_parse_uint(&arg);
416 if (app_input->output_obu != 0 && app_input->output_obu != 1)
417 die("Invalid value for obu output flag (0, 1): %d.",
418 app_input->output_obu);
419 } else if (arg_match(&arg, &test_decode_arg, argi)) {
420 app_input->decode = arg_parse_uint(&arg);
421 if (app_input->decode != 0 && app_input->decode != 1)
422 die("Invalid value for test decode flag (0, 1): %d.",
423 app_input->decode);
424 } else if (arg_match(&arg, &tune_content_arg, argi)) {
425 app_input->tune_content = arg_parse_enum_or_int(&arg);
426 printf("tune content %d\n", app_input->tune_content);
427 } else if (arg_match(&arg, &psnr_arg, argi)) {
428 app_input->show_psnr = 1;
429 } else if (arg_match(&arg, &ext_rc_arg, argi)) {
430 app_input->use_external_rc = true;
431#if CONFIG_CWG_E050
432 } else if (arg_match(&arg, &multilayer_metadata_file_arg, argi)) {
433 app_input->multilayer_metadata_file = arg.val;
434#endif
435 } else {
436 ++argj;
437 }
438 }
439
440 // Total bitrate needs to be parsed after the number of layers.
441 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
442 arg.argv_step = 1;
443 if (arg_match(&arg, &bitrates_arg, argi)) {
444 aom_codec_err_t res = parse_layer_options_from_string(
445 svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
446 if (res != AOM_CODEC_OK) {
447 die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
448 }
449 } else {
450 ++argj;
451 }
452 }
453
454 // There will be a space in front of the string options
455 if (strlen(string_options) > 0)
456 strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
457
458 // Check for unrecognized options
459 for (argi = argv; *argi; ++argi)
460 if (argi[0][0] == '-' && strlen(argi[0]) > 1)
461 die("Error: Unrecognized option %s\n", *argi);
462
463 if (argv[0] == NULL) {
464 usage_exit();
465 }
466
467 int input_count = 0;
468 while (argv[input_count] != NULL && input_count < MAX_NUM_SPATIAL_LAYERS) {
469 app_input->input_ctx[input_count].filename = argv[input_count];
470 ++input_count;
471 }
472 if (input_count > 1 && input_count != svc_params->number_spatial_layers) {
473 die("Error: Number of input files does not match number of spatial layers");
474 }
475 if (argv[input_count] != NULL) {
476 die("Error: Too many input files specified, there should be at most %d",
477 MAX_NUM_SPATIAL_LAYERS);
478 }
479
480 free(argv);
481
482 for (int i = 0; i < input_count; ++i) {
483 open_input_file(&app_input->input_ctx[i], AOM_CSP_UNKNOWN);
484 if (app_input->input_ctx[i].file_type == FILE_TYPE_Y4M) {
485 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
486 // Override these settings with the info from Y4M file.
487 enc_cfg->g_w = app_input->input_ctx[i].width;
488 enc_cfg->g_h = app_input->input_ctx[i].height;
489 // g_timebase is the reciprocal of frame rate.
490 enc_cfg->g_timebase.num = app_input->input_ctx[i].framerate.denominator;
491 enc_cfg->g_timebase.den = app_input->input_ctx[i].framerate.numerator;
492 } else if (enc_cfg->g_w != app_input->input_ctx[i].width ||
493 enc_cfg->g_h != app_input->input_ctx[i].height ||
494 enc_cfg->g_timebase.num !=
495 app_input->input_ctx[i].framerate.denominator ||
496 enc_cfg->g_timebase.den !=
497 app_input->input_ctx[i].framerate.numerator) {
498 die("Error: Input file dimensions and/or frame rate mismatch");
499 }
500 }
501 }
502 if (enc_cfg->g_w == 0 || enc_cfg->g_h == 0) {
503 die("Error: Input file dimensions not set, use -w and -h");
504 }
505
506 if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
507 enc_cfg->g_h % 2)
508 die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
509
510 printf(
511 "Codec %s\n"
512 "layers: %d\n"
513 "width %u, height: %u\n"
514 "num: %d, den: %d, bitrate: %u\n"
515 "gop size: %u\n",
517 svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
518 enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
519 enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
520}
521
522static const int mode_to_num_temporal_layers[12] = {
523 1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
524};
525static const int mode_to_num_spatial_layers[12] = {
526 1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
527};
528
529// For rate control encoding stats.
530struct RateControlMetrics {
531 // Number of input frames per layer.
532 int layer_input_frames[AOM_MAX_TS_LAYERS];
533 // Number of encoded non-key frames per layer.
534 int layer_enc_frames[AOM_MAX_TS_LAYERS];
535 // Framerate per layer layer (cumulative).
536 double layer_framerate[AOM_MAX_TS_LAYERS];
537 // Target average frame size per layer (per-frame-bandwidth per layer).
538 double layer_pfb[AOM_MAX_LAYERS];
539 // Actual average frame size per layer.
540 double layer_avg_frame_size[AOM_MAX_LAYERS];
541 // Average rate mismatch per layer (|target - actual| / target).
542 double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
543 // Actual encoding bitrate per layer (cumulative across temporal layers).
544 double layer_encoding_bitrate[AOM_MAX_LAYERS];
545 // Average of the short-time encoder actual bitrate.
546 // TODO(marpan): Should we add these short-time stats for each layer?
547 double avg_st_encoding_bitrate;
548 // Variance of the short-time encoder actual bitrate.
549 double variance_st_encoding_bitrate;
550 // Window (number of frames) for computing short-timee encoding bitrate.
551 int window_size;
552 // Number of window measurements.
553 int window_count;
554 int layer_target_bitrate[AOM_MAX_LAYERS];
555};
556
557static const int REF_FRAMES = 8;
558
559static const int INTER_REFS_PER_FRAME = 7;
560
561// Reference frames used in this example encoder.
562enum {
563 SVC_LAST_FRAME = 0,
564 SVC_LAST2_FRAME,
565 SVC_LAST3_FRAME,
566 SVC_GOLDEN_FRAME,
567 SVC_BWDREF_FRAME,
568 SVC_ALTREF2_FRAME,
569 SVC_ALTREF_FRAME
570};
571
572static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
573 FILE *f = input_ctx->file;
574 y4m_input *y4m = &input_ctx->y4m;
575 int shortread = 0;
576
577 if (input_ctx->file_type == FILE_TYPE_Y4M) {
578 if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
579 } else {
580 shortread = read_yuv_frame(input_ctx, img);
581 }
582
583 return !shortread;
584}
585
586static void close_input_file(struct AvxInputContext *input) {
587 fclose(input->file);
588 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
589}
590
591// Note: these rate control metrics assume only 1 key frame in the
592// sequence (i.e., first frame only). So for temporal pattern# 7
593// (which has key frame for every frame on base layer), the metrics
594// computation will be off/wrong.
595// TODO(marpan): Update these metrics to account for multiple key frames
596// in the stream.
597static void set_rate_control_metrics(struct RateControlMetrics *rc,
598 double framerate, int ss_number_layers,
599 int ts_number_layers) {
600 int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
601 ts_rate_decimator[0] = 1;
602 if (ts_number_layers == 2) {
603 ts_rate_decimator[0] = 2;
604 ts_rate_decimator[1] = 1;
605 }
606 if (ts_number_layers == 3) {
607 ts_rate_decimator[0] = 4;
608 ts_rate_decimator[1] = 2;
609 ts_rate_decimator[2] = 1;
610 }
611 // Set the layer (cumulative) framerate and the target layer (non-cumulative)
612 // per-frame-bandwidth, for the rate control encoding stats below.
613 for (int sl = 0; sl < ss_number_layers; ++sl) {
614 int i = sl * ts_number_layers;
615 rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
616 rc->layer_pfb[i] =
617 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
618 for (int tl = 0; tl < ts_number_layers; ++tl) {
619 i = sl * ts_number_layers + tl;
620 if (tl > 0) {
621 rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
622 rc->layer_pfb[i] =
623 1000.0 *
624 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
625 (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
626 }
627 rc->layer_input_frames[tl] = 0;
628 rc->layer_enc_frames[tl] = 0;
629 rc->layer_encoding_bitrate[i] = 0.0;
630 rc->layer_avg_frame_size[i] = 0.0;
631 rc->layer_avg_rate_mismatch[i] = 0.0;
632 }
633 }
634 rc->window_count = 0;
635 rc->window_size = 15;
636 rc->avg_st_encoding_bitrate = 0.0;
637 rc->variance_st_encoding_bitrate = 0.0;
638}
639
640static void printout_rate_control_summary(struct RateControlMetrics *rc,
641 int frame_cnt, int ss_number_layers,
642 int ts_number_layers) {
643 int tot_num_frames = 0;
644 double perc_fluctuation = 0.0;
645 printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
646 printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
647 for (int sl = 0; sl < ss_number_layers; ++sl) {
648 tot_num_frames = 0;
649 for (int tl = 0; tl < ts_number_layers; ++tl) {
650 int i = sl * ts_number_layers + tl;
651 const int num_dropped =
652 tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
653 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
654 tot_num_frames += rc->layer_input_frames[tl];
655 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
656 rc->layer_encoding_bitrate[i] /
657 tot_num_frames;
658 rc->layer_avg_frame_size[i] =
659 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
660 rc->layer_avg_rate_mismatch[i] =
661 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
662 printf("For layer#: %d %d \n", sl, tl);
663 printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
664 rc->layer_encoding_bitrate[i]);
665 printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
666 rc->layer_avg_frame_size[i]);
667 printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
668 printf(
669 "Number of input frames, encoded (non-key) frames, "
670 "and perc dropped frames: %d %d %f\n",
671 rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
672 100.0 * num_dropped / rc->layer_input_frames[tl]);
673 printf("\n");
674 }
675 }
676 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
677 rc->variance_st_encoding_bitrate =
678 rc->variance_st_encoding_bitrate / rc->window_count -
679 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
680 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
681 rc->avg_st_encoding_bitrate;
682 printf("Short-time stats, for window of %d frames:\n", rc->window_size);
683 printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
684 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
685 perc_fluctuation);
686 if (frame_cnt - 1 != tot_num_frames)
687 die("Error: Number of input frames not equal to output!\n");
688}
689
690// Layer pattern configuration.
691static void set_layer_pattern(
692 int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
693 aom_svc_ref_frame_config_t *ref_frame_config,
694 aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
695 int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed,
696 int *reference_updated, int test_roi_map) {
697 // Setting this flag to 1 enables simplex example of
698 // RPS (Reference Picture Selection) for 1 layer.
699 int use_rps_example = 0;
700 int i;
701 int enable_longterm_temporal_ref = 1;
702 int shift = (layering_mode == 8) ? 2 : 0;
703 int simulcast_mode = (layering_mode == 11);
704 *use_svc_control = 1;
705 layer_id->spatial_layer_id = spatial_layer_id;
706 int lag_index = 0;
707 int base_count = superframe_cnt >> 2;
708 ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
709 ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
710 ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
711 // Set the reference map buffer idx for the 7 references:
712 // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
713 // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
714 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
715 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
716 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
717
718 if (ksvc_mode) {
719 // Same pattern as case 9, but the reference strucutre will be constrained
720 // below.
721 layering_mode = 9;
722 }
723 switch (layering_mode) {
724 case 0:
725 if (use_rps_example == 0) {
726 // 1-layer: update LAST on every frame, reference LAST.
727 layer_id->temporal_layer_id = 0;
728 layer_id->spatial_layer_id = 0;
729 ref_frame_config->refresh[0] = 1;
730 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
731 // Add additional reference (GOLDEN) if test_roi_map is set,
732 // to test reference frame feature on segment.
733 if (test_roi_map) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
734 } else {
735 // Pattern of 2 references (ALTREF and GOLDEN) trailing
736 // LAST by 4 and 8 frames, with some switching logic to
737 // sometimes only predict from the longer-term reference
738 //(golden here). This is simple example to test RPS
739 // (reference picture selection).
740 int last_idx = 0;
741 int last_idx_refresh = 0;
742 int gld_idx = 0;
743 int alt_ref_idx = 0;
744 int lag_alt = 4;
745 int lag_gld = 8;
746 layer_id->temporal_layer_id = 0;
747 layer_id->spatial_layer_id = 0;
748 int sh = 8; // slots 0 - 7.
749 // Moving index slot for last: 0 - (sh - 1)
750 if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
751 // Moving index for refresh of last: one ahead for next frame.
752 last_idx_refresh = superframe_cnt % sh;
753 // Moving index for gld_ref, lag behind current by lag_gld
754 if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
755 // Moving index for alt_ref, lag behind LAST by lag_alt frames.
756 if (superframe_cnt > lag_alt)
757 alt_ref_idx = (superframe_cnt - lag_alt) % sh;
758 // Set the ref_idx.
759 // Default all references to slot for last.
760 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
761 ref_frame_config->ref_idx[i] = last_idx;
762 // Set the ref_idx for the relevant references.
763 ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
764 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
765 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
766 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
767 // Refresh this slot, which will become LAST on next frame.
768 ref_frame_config->refresh[last_idx_refresh] = 1;
769 // Reference LAST, ALTREF, and GOLDEN
770 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
771 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
772 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
773 // Switch to only GOLDEN every 300 frames.
774 if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
775 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
776 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
777 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
778 // Test if the long-term is LAST instead, this is just a renaming
779 // but its tests if encoder behaves the same, whether its
780 // LAST or GOLDEN.
781 if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
782 ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
783 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
784 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
785 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
786 }
787 }
788 }
789 break;
790 case 1:
791 // 2-temporal layer.
792 // 1 3 5
793 // 0 2 4
794 // Keep golden fixed at slot 3.
795 base_count = superframe_cnt >> 1;
796 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
797 // Cyclically refresh slots 5, 6, 7, for lag alt ref.
798 lag_index = 5;
799 if (base_count > 0) {
800 lag_index = 5 + (base_count % 3);
801 if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
802 }
803 // Set the altref slot to lag_index.
804 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
805 if (superframe_cnt % 2 == 0) {
806 layer_id->temporal_layer_id = 0;
807 // Update LAST on layer 0, reference LAST.
808 ref_frame_config->refresh[0] = 1;
809 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
810 // Refresh lag_index slot, needed for lagging golen.
811 ref_frame_config->refresh[lag_index] = 1;
812 // Refresh GOLDEN every x base layer frames.
813 if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
814 } else {
815 layer_id->temporal_layer_id = 1;
816 // No updates on layer 1, reference LAST (TL0).
817 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
818 }
819 // Always reference golden and altref on TL0.
820 if (layer_id->temporal_layer_id == 0) {
821 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
822 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
823 }
824 break;
825 case 2:
826 // 3-temporal layer:
827 // 1 3 5 7
828 // 2 6
829 // 0 4 8
830 if (superframe_cnt % 4 == 0) {
831 // Base layer.
832 layer_id->temporal_layer_id = 0;
833 // Update LAST on layer 0, reference LAST.
834 ref_frame_config->refresh[0] = 1;
835 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
836 } else if ((superframe_cnt - 1) % 4 == 0) {
837 layer_id->temporal_layer_id = 2;
838 // First top layer: no updates, only reference LAST (TL0).
839 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
840 } else if ((superframe_cnt - 2) % 4 == 0) {
841 layer_id->temporal_layer_id = 1;
842 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
843 ref_frame_config->refresh[1] = 1;
844 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
845 } else if ((superframe_cnt - 3) % 4 == 0) {
846 layer_id->temporal_layer_id = 2;
847 // Second top layer: no updates, only reference LAST.
848 // Set buffer idx for LAST to slot 1, since that was the slot
849 // updated in previous frame. So LAST is TL1 frame.
850 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
851 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
852 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
853 }
854 break;
855 case 3:
856 // 3 TL, same as above, except allow for predicting
857 // off 2 more references (GOLDEN and ALTREF), with
858 // GOLDEN updated periodically, and ALTREF lagging from
859 // LAST from ~4 frames. Both GOLDEN and ALTREF
860 // can only be updated on base temporal layer.
861
862 // Keep golden fixed at slot 3.
863 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
864 // Cyclically refresh slots 5, 6, 7, for lag altref.
865 lag_index = 5;
866 if (base_count > 0) {
867 lag_index = 5 + (base_count % 3);
868 if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
869 }
870 // Set the altref slot to lag_index.
871 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
872 if (superframe_cnt % 4 == 0) {
873 // Base layer.
874 layer_id->temporal_layer_id = 0;
875 // Update LAST on layer 0, reference LAST.
876 ref_frame_config->refresh[0] = 1;
877 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
878 // Refresh GOLDEN every x ~10 base layer frames.
879 if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
880 // Refresh lag_index slot, needed for lagging altref.
881 ref_frame_config->refresh[lag_index] = 1;
882 } else if ((superframe_cnt - 1) % 4 == 0) {
883 layer_id->temporal_layer_id = 2;
884 // First top layer: no updates, only reference LAST (TL0).
885 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
886 } else if ((superframe_cnt - 2) % 4 == 0) {
887 layer_id->temporal_layer_id = 1;
888 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
889 ref_frame_config->refresh[1] = 1;
890 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
891 } else if ((superframe_cnt - 3) % 4 == 0) {
892 layer_id->temporal_layer_id = 2;
893 // Second top layer: no updates, only reference LAST.
894 // Set buffer idx for LAST to slot 1, since that was the slot
895 // updated in previous frame. So LAST is TL1 frame.
896 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
897 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
898 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
899 }
900 // Every frame can reference GOLDEN AND ALTREF.
901 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
902 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
903 // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
904 if (speed >= 7) {
905 ref_frame_comp_pred->use_comp_pred[2] = 1;
906 ref_frame_comp_pred->use_comp_pred[0] = 1;
907 }
908 break;
909 case 4:
910 // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
911 // only reference GF (not LAST). Other frames only reference LAST.
912 // 1 3 5 7
913 // 2 6
914 // 0 4 8
915 if (superframe_cnt % 4 == 0) {
916 // Base layer.
917 layer_id->temporal_layer_id = 0;
918 // Update LAST on layer 0, only reference LAST.
919 ref_frame_config->refresh[0] = 1;
920 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
921 } else if ((superframe_cnt - 1) % 4 == 0) {
922 layer_id->temporal_layer_id = 2;
923 // First top layer: no updates, only reference LAST (TL0).
924 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
925 } else if ((superframe_cnt - 2) % 4 == 0) {
926 layer_id->temporal_layer_id = 1;
927 // Middle layer (TL1): update GF, only reference LAST (TL0).
928 ref_frame_config->refresh[3] = 1;
929 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
930 } else if ((superframe_cnt - 3) % 4 == 0) {
931 layer_id->temporal_layer_id = 2;
932 // Second top layer: no updates, only reference GF.
933 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
934 }
935 break;
936
937 case 5:
938 /*
939 // 2 spatial layers, 1 temporal, without temporal prediction on SL1.
940 layer_id->temporal_layer_id = 0;
941 if (layer_id->spatial_layer_id == 0) {
942 // Reference LAST, update LAST.
943 ref_frame_config->refresh[0] = 1;
944 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
945 } else if (layer_id->spatial_layer_id == 1) {
946 // Reference LAST, which is SL0, and no refresh.
947 ref_frame_config->refresh[0] = 0;
948 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
949 }
950 break;
951 */
952 // 2 spatial layers, 1 temporal.
953 layer_id->temporal_layer_id = 0;
954 if (layer_id->spatial_layer_id == 0) {
955 // Reference LAST, update LAST.
956 ref_frame_config->refresh[0] = 1;
957 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
958 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 2;
959 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
960 } else if (layer_id->spatial_layer_id == 1) {
961 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
962 // and GOLDEN to slot 0. Update slot 1 (LAST).
963 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
964 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
965 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 2;
966 ref_frame_config->refresh[1] = 1;
967 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
968 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
969 }
970 break;
971
972 case 6:
973 // 3 spatial layers, 1 temporal.
974 // Note for this case, we set the buffer idx for all references to be
975 // either LAST or GOLDEN, which are always valid references, since decoder
976 // will check if any of the 7 references is valid scale in
977 // valid_ref_frame_size().
978 layer_id->temporal_layer_id = 0;
979 if (layer_id->spatial_layer_id == 0) {
980 // Reference LAST, update LAST. Set all buffer_idx to 0.
981 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
982 ref_frame_config->ref_idx[i] = 0;
983 ref_frame_config->refresh[0] = 1;
984 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
985 } else if (layer_id->spatial_layer_id == 1) {
986 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
987 // and GOLDEN (and all other refs) to slot 0.
988 // Update slot 1 (LAST).
989 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
990 ref_frame_config->ref_idx[i] = 0;
991 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
992 ref_frame_config->refresh[1] = 1;
993 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
994 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
995 } else if (layer_id->spatial_layer_id == 2) {
996 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
997 // and GOLDEN (and all other refs) to slot 1.
998 // Update slot 2 (LAST).
999 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1000 ref_frame_config->ref_idx[i] = 1;
1001 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1002 ref_frame_config->refresh[2] = 1;
1003 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1004 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1005 // For 3 spatial layer case: allow for top spatial layer to use
1006 // additional temporal reference. Update every 10 frames.
1007 if (enable_longterm_temporal_ref) {
1008 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1009 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1010 if (base_count % 10 == 0)
1011 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1012 }
1013 }
1014 break;
1015 case 7:
1016 // 2 spatial and 3 temporal layer.
1017 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1018 if (superframe_cnt % 4 == 0) {
1019 // Base temporal layer
1020 layer_id->temporal_layer_id = 0;
1021 if (layer_id->spatial_layer_id == 0) {
1022 // Reference LAST, update LAST
1023 // Set all buffer_idx to 0
1024 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1025 ref_frame_config->ref_idx[i] = 0;
1026 ref_frame_config->refresh[0] = 1;
1027 } else if (layer_id->spatial_layer_id == 1) {
1028 // Reference LAST and GOLDEN.
1029 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1030 ref_frame_config->ref_idx[i] = 0;
1031 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1032 ref_frame_config->refresh[1] = 1;
1033 }
1034 } else if ((superframe_cnt - 1) % 4 == 0) {
1035 // First top temporal enhancement layer.
1036 layer_id->temporal_layer_id = 2;
1037 if (layer_id->spatial_layer_id == 0) {
1038 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1039 ref_frame_config->ref_idx[i] = 0;
1040 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1041 ref_frame_config->refresh[3] = 1;
1042 } else if (layer_id->spatial_layer_id == 1) {
1043 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1044 // GOLDEN (and all other refs) to slot 3.
1045 // No update.
1046 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1047 ref_frame_config->ref_idx[i] = 3;
1048 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1049 }
1050 } else if ((superframe_cnt - 2) % 4 == 0) {
1051 // Middle temporal enhancement layer.
1052 layer_id->temporal_layer_id = 1;
1053 if (layer_id->spatial_layer_id == 0) {
1054 // Reference LAST.
1055 // Set all buffer_idx to 0.
1056 // Set GOLDEN to slot 5 and update slot 5.
1057 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1058 ref_frame_config->ref_idx[i] = 0;
1059 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1060 ref_frame_config->refresh[5 - shift] = 1;
1061 } else if (layer_id->spatial_layer_id == 1) {
1062 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1063 // GOLDEN (and all other refs) to slot 5.
1064 // Set LAST3 to slot 6 and update slot 6.
1065 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1066 ref_frame_config->ref_idx[i] = 5 - shift;
1067 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1068 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1069 ref_frame_config->refresh[6 - shift] = 1;
1070 }
1071 } else if ((superframe_cnt - 3) % 4 == 0) {
1072 // Second top temporal enhancement layer.
1073 layer_id->temporal_layer_id = 2;
1074 if (layer_id->spatial_layer_id == 0) {
1075 // Set LAST to slot 5 and reference LAST.
1076 // Set GOLDEN to slot 3 and update slot 3.
1077 // Set all other buffer_idx to 0.
1078 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1079 ref_frame_config->ref_idx[i] = 0;
1080 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1081 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1082 ref_frame_config->refresh[3] = 1;
1083 } else if (layer_id->spatial_layer_id == 1) {
1084 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1085 // GOLDEN to slot 3. No update.
1086 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1087 ref_frame_config->ref_idx[i] = 0;
1088 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1089 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1090 }
1091 }
1092 break;
1093 case 8:
1094 // 3 spatial and 3 temporal layer.
1095 // Same as case 9 but overalap in the buffer slot updates.
1096 // (shift = 2). The slots 3 and 4 updated by first TL2 are
1097 // reused for update in TL1 superframe.
1098 // Note for this case, frame order hint must be disabled for
1099 // lower resolutios (operating points > 0) to be decoedable.
1100 case 9:
1101 // 3 spatial and 3 temporal layer.
1102 // No overlap in buffer updates between TL2 and TL1.
1103 // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1104 // Set the references via the svc_ref_frame_config control.
1105 // Always reference LAST.
1106 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1107 if (superframe_cnt % 4 == 0) {
1108 // Base temporal layer.
1109 layer_id->temporal_layer_id = 0;
1110 if (layer_id->spatial_layer_id == 0) {
1111 // Reference LAST, update LAST.
1112 // Set all buffer_idx to 0.
1113 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1114 ref_frame_config->ref_idx[i] = 0;
1115 ref_frame_config->refresh[0] = 1;
1116 } else if (layer_id->spatial_layer_id == 1) {
1117 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1118 // GOLDEN (and all other refs) to slot 0.
1119 // Update slot 1 (LAST).
1120 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1121 ref_frame_config->ref_idx[i] = 0;
1122 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1123 ref_frame_config->refresh[1] = 1;
1124 } else if (layer_id->spatial_layer_id == 2) {
1125 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1126 // GOLDEN (and all other refs) to slot 1.
1127 // Update slot 2 (LAST).
1128 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1129 ref_frame_config->ref_idx[i] = 1;
1130 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1131 ref_frame_config->refresh[2] = 1;
1132 }
1133 } else if ((superframe_cnt - 1) % 4 == 0) {
1134 // First top temporal enhancement layer.
1135 layer_id->temporal_layer_id = 2;
1136 if (layer_id->spatial_layer_id == 0) {
1137 // Reference LAST (slot 0).
1138 // Set GOLDEN to slot 3 and update slot 3.
1139 // Set all other buffer_idx to slot 0.
1140 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1141 ref_frame_config->ref_idx[i] = 0;
1142 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1143 ref_frame_config->refresh[3] = 1;
1144 } else if (layer_id->spatial_layer_id == 1) {
1145 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1146 // GOLDEN (and all other refs) to slot 3.
1147 // Set LAST2 to slot 4 and Update slot 4.
1148 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1149 ref_frame_config->ref_idx[i] = 3;
1150 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1151 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1152 ref_frame_config->refresh[4] = 1;
1153 } else if (layer_id->spatial_layer_id == 2) {
1154 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1155 // GOLDEN (and all other refs) to slot 4.
1156 // No update.
1157 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1158 ref_frame_config->ref_idx[i] = 4;
1159 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1160 }
1161 } else if ((superframe_cnt - 2) % 4 == 0) {
1162 // Middle temporal enhancement layer.
1163 layer_id->temporal_layer_id = 1;
1164 if (layer_id->spatial_layer_id == 0) {
1165 // Reference LAST.
1166 // Set all buffer_idx to 0.
1167 // Set GOLDEN to slot 5 and update slot 5.
1168 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1169 ref_frame_config->ref_idx[i] = 0;
1170 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1171 ref_frame_config->refresh[5 - shift] = 1;
1172 } else if (layer_id->spatial_layer_id == 1) {
1173 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1174 // GOLDEN (and all other refs) to slot 5.
1175 // Set LAST3 to slot 6 and update slot 6.
1176 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1177 ref_frame_config->ref_idx[i] = 5 - shift;
1178 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1179 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1180 ref_frame_config->refresh[6 - shift] = 1;
1181 } else if (layer_id->spatial_layer_id == 2) {
1182 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1183 // GOLDEN (and all other refs) to slot 6.
1184 // Set LAST3 to slot 7 and update slot 7.
1185 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1186 ref_frame_config->ref_idx[i] = 6 - shift;
1187 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1188 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1189 ref_frame_config->refresh[7 - shift] = 1;
1190 }
1191 } else if ((superframe_cnt - 3) % 4 == 0) {
1192 // Second top temporal enhancement layer.
1193 layer_id->temporal_layer_id = 2;
1194 if (layer_id->spatial_layer_id == 0) {
1195 // Set LAST to slot 5 and reference LAST.
1196 // Set GOLDEN to slot 3 and update slot 3.
1197 // Set all other buffer_idx to 0.
1198 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1199 ref_frame_config->ref_idx[i] = 0;
1200 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1201 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1202 ref_frame_config->refresh[3] = 1;
1203 } else if (layer_id->spatial_layer_id == 1) {
1204 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1205 // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1206 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1207 ref_frame_config->ref_idx[i] = 0;
1208 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1209 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1210 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1211 ref_frame_config->refresh[4] = 1;
1212 } else if (layer_id->spatial_layer_id == 2) {
1213 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1214 // GOLDEN to slot 4. No update.
1215 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1216 ref_frame_config->ref_idx[i] = 0;
1217 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1218 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1219 }
1220 }
1221 break;
1222 case 11:
1223 // Simulcast mode for 3 spatial and 3 temporal layers.
1224 // No inter-layer predicton, only prediction is temporal and single
1225 // reference (LAST).
1226 // No overlap in buffer slots between spatial layers. So for example,
1227 // SL0 only uses slots 0 and 1.
1228 // SL1 only uses slots 2 and 3.
1229 // SL2 only uses slots 4 and 5.
1230 // All 7 references for each inter-frame must only access buffer slots
1231 // for that spatial layer.
1232 // On key (super)frames: SL1 and SL2 must have no references set
1233 // and must refresh all the slots for that layer only (so 2 and 3
1234 // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1235 // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1236 // internally as Intra-only frames that allow that stream to be decoded.
1237 // These conditions will allow for each spatial stream to be
1238 // independently decodeable.
1239
1240 // Initialize all references to 0 (don't use reference).
1241 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1242 ref_frame_config->reference[i] = 0;
1243 // Initialize as no refresh/update for all slots.
1244 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1245 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1246 ref_frame_config->ref_idx[i] = 0;
1247
1248 if (is_key_frame) {
1249 if (layer_id->spatial_layer_id == 0) {
1250 // Assign LAST/GOLDEN to slot 0/1.
1251 // Refesh slots 0 and 1 for SL0.
1252 // SL0: this will get set to KEY frame internally.
1253 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1254 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1255 ref_frame_config->refresh[0] = 1;
1256 ref_frame_config->refresh[1] = 1;
1257 } else if (layer_id->spatial_layer_id == 1) {
1258 // Assign LAST/GOLDEN to slot 2/3.
1259 // Refesh slots 2 and 3 for SL1.
1260 // This will get set to Intra-only frame internally.
1261 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1262 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1263 ref_frame_config->refresh[2] = 1;
1264 ref_frame_config->refresh[3] = 1;
1265 } else if (layer_id->spatial_layer_id == 2) {
1266 // Assign LAST/GOLDEN to slot 4/5.
1267 // Refresh slots 4 and 5 for SL2.
1268 // This will get set to Intra-only frame internally.
1269 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1270 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1271 ref_frame_config->refresh[4] = 1;
1272 ref_frame_config->refresh[5] = 1;
1273 }
1274 } else if (superframe_cnt % 4 == 0) {
1275 // Base temporal layer: TL0
1276 layer_id->temporal_layer_id = 0;
1277 if (layer_id->spatial_layer_id == 0) { // SL0
1278 // Reference LAST. Assign all references to either slot
1279 // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1280 // Update slot 0 (LAST).
1281 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1282 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1283 ref_frame_config->ref_idx[i] = 1;
1284 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1285 ref_frame_config->refresh[0] = 1;
1286 } else if (layer_id->spatial_layer_id == 1) { // SL1
1287 // Reference LAST. Assign all references to either slot
1288 // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1289 // Update slot 2 (LAST).
1290 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1291 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1292 ref_frame_config->ref_idx[i] = 3;
1293 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1294 ref_frame_config->refresh[2] = 1;
1295 } else if (layer_id->spatial_layer_id == 2) { // SL2
1296 // Reference LAST. Assign all references to either slot
1297 // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1298 // Update slot 4 (LAST).
1299 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1300 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1301 ref_frame_config->ref_idx[i] = 5;
1302 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1303 ref_frame_config->refresh[4] = 1;
1304 }
1305 } else if ((superframe_cnt - 1) % 4 == 0) {
1306 // First top temporal enhancement layer: TL2
1307 layer_id->temporal_layer_id = 2;
1308 if (layer_id->spatial_layer_id == 0) { // SL0
1309 // Reference LAST (slot 0). Assign other references to slot 1.
1310 // No update/refresh on any slots.
1311 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1312 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1313 ref_frame_config->ref_idx[i] = 1;
1314 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1315 } else if (layer_id->spatial_layer_id == 1) { // SL1
1316 // Reference LAST (slot 2). Assign other references to slot 3.
1317 // No update/refresh on any slots.
1318 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1319 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1320 ref_frame_config->ref_idx[i] = 3;
1321 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1322 } else if (layer_id->spatial_layer_id == 2) { // SL2
1323 // Reference LAST (slot 4). Assign other references to slot 4.
1324 // No update/refresh on any slots.
1325 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1326 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1327 ref_frame_config->ref_idx[i] = 5;
1328 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1329 }
1330 } else if ((superframe_cnt - 2) % 4 == 0) {
1331 // Middle temporal enhancement layer: TL1
1332 layer_id->temporal_layer_id = 1;
1333 if (layer_id->spatial_layer_id == 0) { // SL0
1334 // Reference LAST (slot 0).
1335 // Set GOLDEN to slot 1 and update slot 1.
1336 // This will be used as reference for next TL2.
1337 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1338 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1339 ref_frame_config->ref_idx[i] = 1;
1340 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1341 ref_frame_config->refresh[1] = 1;
1342 } else if (layer_id->spatial_layer_id == 1) { // SL1
1343 // Reference LAST (slot 2).
1344 // Set GOLDEN to slot 3 and update slot 3.
1345 // This will be used as reference for next TL2.
1346 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1347 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1348 ref_frame_config->ref_idx[i] = 3;
1349 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1350 ref_frame_config->refresh[3] = 1;
1351 } else if (layer_id->spatial_layer_id == 2) { // SL2
1352 // Reference LAST (slot 4).
1353 // Set GOLDEN to slot 5 and update slot 5.
1354 // This will be used as reference for next TL2.
1355 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1356 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1357 ref_frame_config->ref_idx[i] = 5;
1358 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1359 ref_frame_config->refresh[5] = 1;
1360 }
1361 } else if ((superframe_cnt - 3) % 4 == 0) {
1362 // Second top temporal enhancement layer: TL2
1363 layer_id->temporal_layer_id = 2;
1364 if (layer_id->spatial_layer_id == 0) { // SL0
1365 // Reference LAST (slot 1). Assign other references to slot 0.
1366 // No update/refresh on any slots.
1367 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1368 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1369 ref_frame_config->ref_idx[i] = 0;
1370 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1371 } else if (layer_id->spatial_layer_id == 1) { // SL1
1372 // Reference LAST (slot 3). Assign other references to slot 2.
1373 // No update/refresh on any slots.
1374 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1375 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1376 ref_frame_config->ref_idx[i] = 2;
1377 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1378 } else if (layer_id->spatial_layer_id == 2) { // SL2
1379 // Reference LAST (slot 5). Assign other references to slot 4.
1380 // No update/refresh on any slots.
1381 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1382 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1383 ref_frame_config->ref_idx[i] = 4;
1384 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1385 }
1386 }
1387 if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1388 // Always reference GOLDEN (inter-layer prediction).
1389 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1390 if (ksvc_mode) {
1391 // KSVC: only keep the inter-layer reference (GOLDEN) for
1392 // superframes whose base is key.
1393 if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1394 }
1395 if (is_key_frame && layer_id->spatial_layer_id > 1) {
1396 // On superframes whose base is key: remove LAST to avoid prediction
1397 // off layer two levels below.
1398 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1399 }
1400 }
1401 // For 3 spatial layer case 8 (where there is free buffer slot):
1402 // allow for top spatial layer to use additional temporal reference.
1403 // Additional reference is only updated on base temporal layer, every
1404 // 10 TL0 frames here.
1405 if (!simulcast_mode && enable_longterm_temporal_ref &&
1406 layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1407 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1408 if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1409 if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1410 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1411 }
1412 break;
1413 default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1414 }
1415 for (i = 0; i < REF_FRAMES; i++) {
1416 if (ref_frame_config->refresh[i] == 1) {
1417 *reference_updated = 1;
1418 break;
1419 }
1420 }
1421}
1422
1423static void write_literal(struct aom_write_bit_buffer *wb, uint32_t data,
1424 uint8_t bits, uint32_t offset = 0) {
1425 if (bits > 32) {
1426 die("Invalid bits value %d > 32\n", bits);
1427 }
1428 const uint32_t max = static_cast<uint32_t>(((uint64_t)1 << bits) - 1);
1429 if (data < offset || (data - offset) > max) {
1430 die("Invalid data, value %u out of range [%u, %" PRIu64 "]\n", data, offset,
1431 (uint64_t)max + offset);
1432 }
1433 aom_wb_write_unsigned_literal(wb, data - offset, bits);
1434}
1435
1436static void write_depth_representation_element(
1437 struct aom_write_bit_buffer *buffer,
1438 const std::pair<libaom_examples::DepthRepresentationElement, bool>
1439 &element) {
1440 if (!element.second) {
1441 return;
1442 }
1443 write_literal(buffer, element.first.sign_flag, 1);
1444 write_literal(buffer, element.first.exponent, 7);
1445 if (element.first.mantissa_len == 0 || element.first.mantissa_len > 32) {
1446 die("Invalid mantissan_len %d\n", element.first.mantissa_len);
1447 }
1448 write_literal(buffer, element.first.mantissa_len - 1, 5);
1449 write_literal(buffer, element.first.mantissa, element.first.mantissa_len);
1450}
1451
1452static void write_color_properties(
1453 struct aom_write_bit_buffer *buffer,
1454 const std::pair<libaom_examples::ColorProperties, bool> &color_properties) {
1455 write_literal(buffer, color_properties.second, 1);
1456 if (color_properties.second) {
1457 write_literal(buffer, color_properties.first.color_range, 1);
1458 write_literal(buffer, color_properties.first.color_primaries, 8);
1459 write_literal(buffer, color_properties.first.transfer_characteristics, 8);
1460 write_literal(buffer, color_properties.first.matrix_coefficients, 8);
1461 } else {
1462 write_literal(buffer, 0, 1); // reserved_1bit
1463 }
1464}
1465
1466static void add_multilayer_metadata(
1467 aom_image_t *frame, const libaom_examples::MultilayerMetadata &multilayer) {
1468 // Large enough buffer for the multilayer metadata.
1469 // Each layer's metadata is less than 100 bytes and there are at most 4
1470 // layers.
1471 std::vector<uint8_t> data(1024);
1472 struct aom_write_bit_buffer buffer = { data.data(), 0 };
1473
1474 write_literal(&buffer, multilayer.use_case, 6);
1475 if (multilayer.layers.empty()) {
1476 die("Invalid multilayer metadata, no layers found\n");
1477 } else if (multilayer.layers.size() > MAX_NUM_SPATIAL_LAYERS) {
1478 die("Invalid multilayer metadata, too many layers (max is %d)\n",
1479 MAX_NUM_SPATIAL_LAYERS);
1480 }
1481 write_literal(&buffer, (int)multilayer.layers.size() - 1, 2);
1482 assert(buffer.bit_offset % 8 == 0);
1483 for (size_t i = 0; i < multilayer.layers.size(); ++i) {
1484 const libaom_examples::LayerMetadata &layer = multilayer.layers[i];
1485 // Alpha info with segmentation with labels can be up to about 66k bytes,
1486 // which requires 3 bytes to encode in leb128.
1487 const int bytes_reserved_for_size = 3;
1488 // Placeholder for layer_metadata_size which will be written later.
1489 write_literal(&buffer, 0, bytes_reserved_for_size * 8);
1490 const uint32_t metadata_start = buffer.bit_offset;
1491 write_literal(&buffer, (int)i, 2); // ml_spatial_id
1492 write_literal(&buffer, layer.layer_type, 5);
1493 write_literal(&buffer, layer.luma_plane_only_flag, 1);
1494 write_literal(&buffer, layer.layer_view_type, 3);
1495 write_literal(&buffer, layer.group_id, 2);
1496 write_literal(&buffer, layer.layer_dependency_idc, 3);
1497 write_literal(&buffer, layer.layer_metadata_scope, 2);
1498 write_literal(&buffer, 0, 4); // ml_reserved_4bits
1499
1500 if (i > 0) {
1501 write_color_properties(&buffer, layer.layer_color_description);
1502 } else {
1503 write_literal(&buffer, 0, 2); // ml_reserved_2bits
1504 }
1505 assert(buffer.bit_offset % 8 == 0);
1506
1507 if (layer.layer_type == libaom_examples::MULTILAYER_LAYER_TYPE_ALPHA &&
1508 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1509 const libaom_examples::AlphaInformation &alpha_info =
1510 layer.global_alpha_info;
1511 write_literal(&buffer, alpha_info.alpha_use_idc, 2);
1512 write_literal(&buffer, alpha_info.alpha_simple_flag, 1);
1513 if (!alpha_info.alpha_simple_flag) {
1514 write_literal(&buffer, alpha_info.alpha_bit_depth, 3, /*offset=*/8);
1515 write_literal(&buffer, alpha_info.alpha_clip_idc, 2);
1516 write_literal(&buffer, alpha_info.alpha_incr_flag, 1);
1517 write_literal(&buffer, alpha_info.alpha_transparent_value,
1518 alpha_info.alpha_bit_depth + 1);
1519 write_literal(&buffer, alpha_info.alpha_opaque_value,
1520 alpha_info.alpha_bit_depth + 1);
1521 if (buffer.bit_offset % 8 != 0) {
1522 // ai_byte_alignment_bits
1523 write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1524 }
1525 assert(buffer.bit_offset % 8 == 0);
1526
1527 write_literal(&buffer, 0, 6); // ai_reserved_6bits
1528 write_color_properties(&buffer, alpha_info.alpha_color_description);
1529 } else {
1530 write_literal(&buffer, 0, 5); // ai_reserved_5bits
1531 }
1532
1533 assert(buffer.bit_offset % 8 == 0);
1534 } else if (layer.layer_type ==
1535 libaom_examples::MULTILAYER_LAYER_TYPE_DEPTH &&
1536 layer.layer_metadata_scope >= libaom_examples::SCOPE_GLOBAL) {
1537 const libaom_examples::DepthInformation &depth_info =
1538 layer.global_depth_info;
1539 write_literal(&buffer, depth_info.z_near.second, 1);
1540 write_literal(&buffer, depth_info.z_far.second, 1);
1541 write_literal(&buffer, depth_info.d_min.second, 1);
1542 write_literal(&buffer, depth_info.d_max.second, 1);
1543 write_literal(&buffer, depth_info.depth_representation_type, 4);
1544 if (depth_info.d_min.second || depth_info.d_max.second) {
1545 write_literal(&buffer, depth_info.disparity_ref_view_id, 2);
1546 }
1547 write_depth_representation_element(&buffer, depth_info.z_near);
1548 write_depth_representation_element(&buffer, depth_info.z_far);
1549 write_depth_representation_element(&buffer, depth_info.d_min);
1550 write_depth_representation_element(&buffer, depth_info.d_max);
1551 if (buffer.bit_offset % 8 != 0) {
1552 write_literal(&buffer, 0, 8 - (buffer.bit_offset % 8));
1553 }
1554 assert(buffer.bit_offset % 8 == 0);
1555 }
1556
1557 assert(buffer.bit_offset % 8 == 0);
1558
1559 const int metadata_size_bytes = (buffer.bit_offset - metadata_start) / 8;
1560 const uint8_t size_pos = metadata_start / 8 - bytes_reserved_for_size;
1561 size_t coded_size;
1562 if (aom_uleb_encode_fixed_size(metadata_size_bytes, bytes_reserved_for_size,
1563 bytes_reserved_for_size,
1564 &buffer.bit_buffer[size_pos], &coded_size)) {
1565 // Need to increase bytes_reserved_for_size in the code above.
1566 die("Error: Failed to write metadata size\n");
1567 }
1568 }
1569 assert(buffer.bit_offset % 8 == 0);
1570 if (aom_img_add_metadata(frame, 33 /*METADATA_TYPE_MULTILAYER*/,
1571 buffer.bit_buffer, buffer.bit_offset / 8,
1573 die("Error: Failed to add metadata\n");
1574 }
1575}
1576
1577#if CONFIG_AV1_DECODER
1578// Returns whether there is a mismatch between the encoder's new frame and the
1579// decoder's new frame.
1580static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1581 const int frames_out) {
1582 aom_image_t enc_img, dec_img;
1583 int mismatch = 0;
1584
1585 /* Get the internal new frame */
1588
1589#if CONFIG_AV1_HIGHBITDEPTH
1590 if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1591 (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1592 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1593 aom_image_t enc_hbd_img;
1595 &enc_hbd_img,
1596 static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1597 enc_img.d_w, enc_img.d_h, 16);
1598 aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1599 enc_img = enc_hbd_img;
1600 }
1601 if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1602 aom_image_t dec_hbd_img;
1604 &dec_hbd_img,
1605 static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1606 dec_img.d_w, dec_img.d_h, 16);
1607 aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1608 dec_img = dec_hbd_img;
1609 }
1610 }
1611#endif
1612
1613 if (!aom_compare_img(&enc_img, &dec_img)) {
1614 int y[4], u[4], v[4];
1615#if CONFIG_AV1_HIGHBITDEPTH
1616 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1617 aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1618 } else {
1619 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1620 }
1621#else
1622 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1623#endif
1624 fprintf(stderr,
1625 "Encode/decode mismatch on frame %d at"
1626 " Y[%d, %d] {%d/%d},"
1627 " U[%d, %d] {%d/%d},"
1628 " V[%d, %d] {%d/%d}\n",
1629 frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1630 v[1], v[2], v[3]);
1631 mismatch = 1;
1632 }
1633
1634 aom_img_free(&enc_img);
1635 aom_img_free(&dec_img);
1636 return mismatch;
1637}
1638#endif // CONFIG_AV1_DECODER
1639
1640struct psnr_stats {
1641 // The second element of these arrays is reserved for high bitdepth.
1642 uint64_t psnr_sse_total[2];
1643 uint64_t psnr_samples_total[2];
1644 double psnr_totals[2][4];
1645 int psnr_count[2];
1646};
1647
1648static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1649 double ovpsnr;
1650
1651 if (!psnr_stream->psnr_count[0]) return;
1652
1653 fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1654 ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1655 (double)psnr_stream->psnr_sse_total[0]);
1656 fprintf(stderr, " %.3f", ovpsnr);
1657
1658 for (int i = 0; i < 4; i++) {
1659 fprintf(stderr, " %.3f",
1660 psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1661 }
1662 fprintf(stderr, "\n");
1663}
1664
1665static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1666 const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1667 aom::AV1RateControlRtcConfig rc_cfg;
1668 rc_cfg.width = cfg.g_w;
1669 rc_cfg.height = cfg.g_h;
1670 rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1671 rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1672 rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1673 rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1674 rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1675 rc_cfg.buf_sz = cfg.rc_buf_sz;
1676 rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1677 rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1678 // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1679 rc_cfg.max_intra_bitrate_pct = 300;
1680 rc_cfg.framerate = cfg.g_timebase.den;
1681 // TODO(jianj): Add suppor for SVC.
1682 rc_cfg.ss_number_layers = 1;
1683 rc_cfg.ts_number_layers = 1;
1684 rc_cfg.scaling_factor_num[0] = 1;
1685 rc_cfg.scaling_factor_den[0] = 1;
1686 rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1687 rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1688 rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1689 rc_cfg.aq_mode = app_input.aq_mode;
1690
1691 return rc_cfg;
1692}
1693
1694static int qindex_to_quantizer(int qindex) {
1695 // Table that converts 0-63 range Q values passed in outside to the 0-255
1696 // range Qindex used internally.
1697 static const int quantizer_to_qindex[] = {
1698 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1699 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1700 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1701 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1702 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1703 };
1704 for (int quantizer = 0; quantizer < 64; ++quantizer)
1705 if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1706
1707 return 63;
1708}
1709
1710static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1711 aom_codec_ctx_t *codec, int frame_cnt) {
1712 aom_active_map_t map = { 0, 0, 0 };
1713
1714 map.rows = (cfg->g_h + 15) / 16;
1715 map.cols = (cfg->g_w + 15) / 16;
1716
1717 map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1718 if (!map.active_map) die("Failed to allocate active map");
1719
1720 // Example map for testing.
1721 for (unsigned int i = 0; i < map.rows; ++i) {
1722 for (unsigned int j = 0; j < map.cols; ++j) {
1723 int index = map.cols * i + j;
1724 map.active_map[index] = 1;
1725 if (frame_cnt < 300) {
1726 if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1727 } else if (frame_cnt >= 300) {
1728 if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1729 }
1730 }
1731 }
1732
1733 if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1734 die_codec(codec, "Failed to set active map");
1735
1736 free(map.active_map);
1737}
1738
1739static void set_roi_map(const aom_codec_enc_cfg_t *cfg, aom_codec_ctx_t *codec,
1740 int roi_feature) {
1742 const int block_size = 4;
1743 roi.rows = (cfg->g_h + block_size - 1) / block_size;
1744 roi.cols = (cfg->g_w + block_size - 1) / block_size;
1745 memset(&roi.skip, 0, sizeof(roi.skip));
1746 memset(&roi.delta_q, 0, sizeof(roi.delta_q));
1747 memset(&roi.delta_lf, 0, sizeof(roi.delta_lf));
1748 memset(roi.ref_frame, -1, sizeof(roi.ref_frame));
1749 // Set ROI map to be 1 (segment #1) in middle square of image,
1750 // 0 elsewhere.
1751 roi.enabled = 1;
1752 roi.roi_map = (uint8_t *)calloc(roi.rows * roi.cols, sizeof(*roi.roi_map));
1753 for (unsigned int i = 0; i < roi.rows; ++i) {
1754 for (unsigned int j = 0; j < roi.cols; ++j) {
1755 const int idx = i * roi.cols + j;
1756 if (i > roi.rows / 4 && i < (3 * roi.rows) / 4 && j > roi.cols / 4 &&
1757 j < (3 * roi.cols) / 4)
1758 roi.roi_map[idx] = 1;
1759 else
1760 roi.roi_map[idx] = 0;
1761 }
1762 }
1763 // Set the ROI feature, on segment #1.
1764 if (roi_feature == kSkip)
1765 roi.skip[1] = 1;
1766 else if (roi_feature == kDeltaQ)
1767 roi.delta_q[1] = -40;
1768 else if (roi_feature == kDeltaLF)
1769 roi.delta_lf[1] = 40;
1770 else if (roi_feature == kReference)
1771 roi.ref_frame[1] = 4; // GOLDEN_FRAME
1772
1773 if (aom_codec_control(codec, AOME_SET_ROI_MAP, &roi))
1774 die_codec(codec, "Failed to set roi map");
1775
1776 free(roi.roi_map);
1777}
1778int main(int argc, const char **argv) {
1779 AppInput app_input;
1780 AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1781 FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1782 AvxVideoWriter *total_layer_file = NULL;
1783 FILE *total_layer_obu_file = NULL;
1785 int frame_cnt = 0;
1786 aom_image_t raw;
1787 int frame_avail;
1788 int got_data = 0;
1789 int flags = 0;
1790 int i;
1791 int pts = 0; // PTS starts at 0.
1792 int frame_duration = 1; // 1 timebase tick per frame.
1793 aom_svc_layer_id_t layer_id;
1794 aom_svc_params_t svc_params;
1795 aom_svc_ref_frame_config_t ref_frame_config;
1796 aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1797
1798#if CONFIG_INTERNAL_STATS
1799 FILE *stats_file = fopen("opsnr.stt", "a");
1800 if (stats_file == NULL) {
1801 die("Cannot open opsnr.stt\n");
1802 }
1803#endif
1804#if CONFIG_AV1_DECODER
1805 aom_codec_ctx_t decoder;
1806#endif
1807
1808 struct RateControlMetrics rc;
1809 int64_t cx_time = 0;
1810 int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1811 int frame_cnt_layer[AOM_MAX_LAYERS];
1812 double sum_bitrate = 0.0;
1813 double sum_bitrate2 = 0.0;
1814 double framerate = 30.0;
1815 int use_svc_control = 1;
1816 int set_err_resil_frame = 0;
1817 int test_changing_bitrate = 0;
1818 zero(rc.layer_target_bitrate);
1819 memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1820 memset(&app_input, 0, sizeof(AppInput));
1821 memset(&svc_params, 0, sizeof(svc_params));
1822
1823 // Flag to test dynamic scaling of source frames for single
1824 // spatial stream, using the scaling_mode control.
1825 const int test_dynamic_scaling_single_layer = 0;
1826
1827 // Flag to test setting speed per layer.
1828 const int test_speed_per_layer = 0;
1829
1830 // Flag for testing active maps.
1831 const int test_active_maps = 0;
1832
1833 // Flag for testing roi map.
1834 const int test_roi_map = 0;
1835
1836 /* Setup default input stream settings */
1837 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
1838 app_input.input_ctx[i].framerate.numerator = 30;
1839 app_input.input_ctx[i].framerate.denominator = 1;
1840 app_input.input_ctx[i].only_i420 = 0;
1841 app_input.input_ctx[i].bit_depth = AOM_BITS_8;
1842 }
1843 app_input.speed = 7;
1844 exec_name = argv[0];
1845
1846 // start with default encoder configuration
1847#if GOOD_QUALITY
1850#else
1853#endif
1854 if (res != AOM_CODEC_OK) {
1855 die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1856 }
1857
1858#if GOOD_QUALITY
1860#else
1861 // Real time parameters.
1863#endif
1864
1865 cfg.rc_end_usage = AOM_CBR;
1866 cfg.rc_min_quantizer = 2;
1867 cfg.rc_max_quantizer = 52;
1868 cfg.rc_undershoot_pct = 50;
1869 cfg.rc_overshoot_pct = 50;
1870 cfg.rc_buf_initial_sz = 600;
1871 cfg.rc_buf_optimal_sz = 600;
1872 cfg.rc_buf_sz = 1000;
1873 cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1874 cfg.g_lag_in_frames = 0;
1875 cfg.kf_mode = AOM_KF_AUTO;
1876 cfg.g_w = 0; // Force user to specify width and height for raw input.
1877 cfg.g_h = 0;
1878
1879 parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1880
1881 int ts_number_layers = svc_params.number_temporal_layers;
1882 int ss_number_layers = svc_params.number_spatial_layers;
1883
1884 unsigned int width = cfg.g_w;
1885 unsigned int height = cfg.g_h;
1886
1887 if (app_input.layering_mode >= 0) {
1888 if (ts_number_layers !=
1889 mode_to_num_temporal_layers[app_input.layering_mode] ||
1890 ss_number_layers !=
1891 mode_to_num_spatial_layers[app_input.layering_mode]) {
1892 die("Number of layers doesn't match layering mode.");
1893 }
1894 }
1895
1896 bool has_non_y4m_input = false;
1897 for (i = 0; i < AOM_MAX_LAYERS; ++i) {
1898 if (app_input.input_ctx[i].file_type != FILE_TYPE_Y4M) {
1899 has_non_y4m_input = true;
1900 break;
1901 }
1902 }
1903 // Y4M reader has its own allocation.
1904 if (has_non_y4m_input) {
1905 if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1906 die("Failed to allocate image (%dx%d)", width, height);
1907 }
1908 }
1909
1911
1912 memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1913 sizeof(svc_params.layer_target_bitrate));
1914
1915 unsigned int total_rate = 0;
1916 for (i = 0; i < ss_number_layers; i++) {
1917 total_rate +=
1918 svc_params
1919 .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1920 }
1921 if (total_rate != cfg.rc_target_bitrate) {
1922 die("Incorrect total target bitrate, expected: %d", total_rate);
1923 }
1924
1925 svc_params.framerate_factor[0] = 1;
1926 if (ts_number_layers == 2) {
1927 svc_params.framerate_factor[0] = 2;
1928 svc_params.framerate_factor[1] = 1;
1929 } else if (ts_number_layers == 3) {
1930 svc_params.framerate_factor[0] = 4;
1931 svc_params.framerate_factor[1] = 2;
1932 svc_params.framerate_factor[2] = 1;
1933 }
1934
1935 libaom_examples::MultilayerMetadata multilayer_metadata;
1936 if (app_input.multilayer_metadata_file != NULL) {
1937 if (!libaom_examples::parse_multilayer_file(
1938 app_input.multilayer_metadata_file, &multilayer_metadata)) {
1939 die("Failed to parse multilayer metadata");
1940 }
1941 libaom_examples::print_multilayer_metadata(multilayer_metadata);
1942 }
1943
1944 framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1945 set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1946
1947 AvxVideoInfo info;
1948 info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1949 info.frame_width = cfg.g_w;
1950 info.frame_height = cfg.g_h;
1951 info.time_base.numerator = cfg.g_timebase.num;
1952 info.time_base.denominator = cfg.g_timebase.den;
1953 // Open an output file for each stream.
1954 for (int sl = 0; sl < ss_number_layers; ++sl) {
1955 for (int tl = 0; tl < ts_number_layers; ++tl) {
1956 i = sl * ts_number_layers + tl;
1957 char file_name[PATH_MAX];
1958 snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1959 app_input.output_filename, i);
1960 if (app_input.output_obu) {
1961 obu_files[i] = fopen(file_name, "wb");
1962 if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1963 } else {
1964 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1965 if (!outfile[i]) die("Failed to open %s for writing", file_name);
1966 }
1967 }
1968 }
1969 if (app_input.output_obu) {
1970 total_layer_obu_file = fopen(app_input.output_filename, "wb");
1971 if (!total_layer_obu_file)
1972 die("Failed to open %s for writing", app_input.output_filename);
1973 } else {
1974 total_layer_file =
1975 aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1976 if (!total_layer_file)
1977 die("Failed to open %s for writing", app_input.output_filename);
1978 }
1979
1980 // Initialize codec.
1981 aom_codec_ctx_t codec;
1982 aom_codec_flags_t flag = 0;
1984 flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1985 if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1986 die_codec(&codec, "Failed to initialize encoder");
1987
1988#if CONFIG_AV1_DECODER
1989 if (app_input.decode) {
1990 if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1991 die_codec(&decoder, "Failed to initialize decoder");
1992 }
1993#endif
1994
1995 aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1996 aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
2006#if GOOD_QUALITY
2011#else
2016#endif
2018
2019 // Settings to reduce key frame encoding time.
2025
2027
2028 aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
2029 if (app_input.tune_content == AOM_CONTENT_SCREEN) {
2031 // INTRABC is currently disabled for rt mode, as it's too slow.
2033 }
2034
2035 if (app_input.use_external_rc) {
2037 }
2038
2040
2043
2045
2046 svc_params.number_spatial_layers = ss_number_layers;
2047 svc_params.number_temporal_layers = ts_number_layers;
2048 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
2049 svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
2050 svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
2051 }
2052 // SET QUANTIZER PER LAYER, E.G FOR 2 SPATIAL LAYERS:
2053 // svc_params.max_quantizers[0] = 40;
2054 // svc_params.min_quantizers[0] = 40;
2055 // svc_params.max_quantizers[1] = 50;
2056 // svc_params.min_quantizers[1] = 50;
2057
2058 if (!app_input.scale_factors_explicitly_set) {
2059 for (i = 0; i < ss_number_layers; ++i) {
2060 svc_params.scaling_factor_num[i] = 1;
2061 svc_params.scaling_factor_den[i] = 1;
2062 }
2063 if (ss_number_layers == 2) {
2064 svc_params.scaling_factor_num[0] = 1;
2065 svc_params.scaling_factor_den[0] = 2;
2066 } else if (ss_number_layers == 3) {
2067 svc_params.scaling_factor_num[0] = 1;
2068 svc_params.scaling_factor_den[0] = 4;
2069 svc_params.scaling_factor_num[1] = 1;
2070 svc_params.scaling_factor_den[1] = 2;
2071 }
2072 }
2073 aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
2074 // TODO(aomedia:3032): Configure KSVC in fixed mode.
2075
2076 // This controls the maximum target size of the key frame.
2077 // For generating smaller key frames, use a smaller max_intra_size_pct
2078 // value, like 100 or 200.
2079 {
2080 const int max_intra_size_pct = 300;
2082 max_intra_size_pct);
2083 }
2084
2085 for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
2086 cx_time_layer[lx] = 0;
2087 frame_cnt_layer[lx] = 0;
2088 }
2089
2090 std::unique_ptr<aom::AV1RateControlRTC> rc_api;
2091 if (app_input.use_external_rc) {
2092 const aom::AV1RateControlRtcConfig rc_cfg =
2093 create_rtc_rc_config(cfg, app_input);
2094 rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
2095 }
2096
2097 frame_avail = 1;
2098 struct psnr_stats psnr_stream;
2099 memset(&psnr_stream, 0, sizeof(psnr_stream));
2100 while (frame_avail || got_data) {
2101 struct aom_usec_timer timer;
2102 frame_avail = read_frame(&(app_input.input_ctx[0]), &raw);
2103 // Loop over spatial layers.
2104 for (int slx = 0; slx < ss_number_layers; slx++) {
2105 if (slx > 0 && app_input.input_ctx[slx].filename != NULL) {
2106 const int previous_layer_frame_avail = frame_avail;
2107 frame_avail = read_frame(&(app_input.input_ctx[slx]), &raw);
2108 if (previous_layer_frame_avail != frame_avail) {
2109 die("Mismatch in number of frames between spatial layer input files");
2110 }
2111 }
2112
2113 aom_codec_iter_t iter = NULL;
2114 const aom_codec_cx_pkt_t *pkt;
2115 int reference_updated = 0;
2116 int layer = 0;
2117 // Flag for superframe whose base is key.
2118 int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
2119 // For flexible mode:
2120 if (app_input.layering_mode >= 0) {
2121 // Set the reference/update flags, layer_id, and reference_map
2122 // buffer index.
2123 set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
2124 &ref_frame_config, &ref_frame_comp_pred,
2125 &use_svc_control, slx, is_key_frame,
2126 (app_input.layering_mode == 10), app_input.speed,
2127 &reference_updated, test_roi_map);
2128 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2129 if (use_svc_control) {
2131 &ref_frame_config);
2133 &ref_frame_comp_pred);
2134 }
2135 if (app_input.multilayer_metadata_file != NULL) {
2136 add_multilayer_metadata(&raw, multilayer_metadata);
2137 }
2138 // Set the speed per layer.
2139 if (test_speed_per_layer) {
2140 int speed_per_layer = 10;
2141 if (layer_id.spatial_layer_id == 0) {
2142 if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
2143 if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
2144 if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
2145 } else if (layer_id.spatial_layer_id == 1) {
2146 if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
2147 if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
2148 if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
2149 } else if (layer_id.spatial_layer_id == 2) {
2150 if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
2151 if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
2152 if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
2153 }
2154 aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
2155 }
2156 } else {
2157 // Only up to 3 temporal layers supported in fixed mode.
2158 // Only need to set spatial and temporal layer_id: reference
2159 // prediction, refresh, and buffer_idx are set internally.
2160 layer_id.spatial_layer_id = slx;
2161 layer_id.temporal_layer_id = 0;
2162 if (ts_number_layers == 2) {
2163 layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
2164 } else if (ts_number_layers == 3) {
2165 if (frame_cnt % 2 != 0)
2166 layer_id.temporal_layer_id = 2;
2167 else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
2168 layer_id.temporal_layer_id = 1;
2169 }
2170 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
2171 }
2172
2173 if (set_err_resil_frame && cfg.g_error_resilient == 0) {
2174 // Set error_resilient per frame: off/0 for base layer and
2175 // on/1 for enhancement layer frames.
2176 // Note that this is can only be done on the fly/per-frame/layer
2177 // if the config error_resilience is off/0. See the logic for updating
2178 // in set_encoder_config():
2179 // tool_cfg->error_resilient_mode =
2180 // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
2181 const int err_resil_mode =
2182 layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
2184 err_resil_mode);
2185 }
2186
2187 layer = slx * ts_number_layers + layer_id.temporal_layer_id;
2188 if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
2189
2190 if (test_dynamic_scaling_single_layer) {
2191 // Example to scale source down by 2x2, then 4x4, and then back up to
2192 // 2x2, and then back to original.
2193 int frame_2x2 = 200;
2194 int frame_4x4 = 400;
2195 int frame_2x2up = 600;
2196 int frame_orig = 800;
2197 if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
2198 // Scale source down by 2x2.
2199 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2200 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2201 } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
2202 // Scale source down by 4x4.
2203 struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
2204 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2205 } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
2206 // Source back up to 2x2.
2207 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
2208 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2209 } else if (frame_cnt >= frame_orig) {
2210 // Source back up to original resolution (no scaling).
2211 struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
2212 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
2213 }
2214 if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
2215 frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
2216 // For dynamic resize testing on single layer: refresh all references
2217 // on the resized frame: this is to avoid decode error:
2218 // if resize goes down by >= 4x4 then libaom decoder will throw an
2219 // error that some reference (even though not used) is beyond the
2220 // limit size (must be smaller than 4x4).
2221 for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
2222 if (use_svc_control) {
2224 &ref_frame_config);
2226 &ref_frame_comp_pred);
2227 }
2228 }
2229 }
2230
2231 // Change target_bitrate every other frame.
2232 if (test_changing_bitrate && frame_cnt % 2 == 0) {
2233 if (frame_cnt < 500)
2234 cfg.rc_target_bitrate += 10;
2235 else
2236 cfg.rc_target_bitrate -= 10;
2237 // Do big increase and decrease.
2238 if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
2239 if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
2240 if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
2241 // Call change_config, or bypass with new control.
2242 // res = aom_codec_enc_config_set(&codec, &cfg);
2244 cfg.rc_target_bitrate))
2245 die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
2246 }
2247
2248 if (rc_api) {
2249 aom::AV1FrameParamsRTC frame_params;
2250 // TODO(jianj): Add support for SVC.
2251 frame_params.spatial_layer_id = 0;
2252 frame_params.temporal_layer_id = 0;
2253 frame_params.frame_type =
2254 is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
2255 rc_api->ComputeQP(frame_params);
2256 const int current_qp = rc_api->GetQP();
2258 qindex_to_quantizer(current_qp))) {
2259 die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
2260 }
2261 }
2262
2263 if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
2264
2265 if (test_roi_map) set_roi_map(&cfg, &codec, kDeltaQ);
2266
2267 // Do the layer encode.
2268 aom_usec_timer_start(&timer);
2269 if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
2270 die_codec(&codec, "Failed to encode frame");
2271 aom_usec_timer_mark(&timer);
2272 cx_time += aom_usec_timer_elapsed(&timer);
2273 cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
2274 frame_cnt_layer[layer] += 1;
2275
2276 // Get the high motion content flag.
2277 int content_flag = 0;
2279 &content_flag)) {
2280 die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
2281 }
2282
2283 got_data = 0;
2284 // For simulcast (mode 11): write out each spatial layer to the file.
2285 int ss_layers_write = (app_input.layering_mode == 11)
2286 ? layer_id.spatial_layer_id + 1
2287 : ss_number_layers;
2288 while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
2289 switch (pkt->kind) {
2291 for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
2292 ++sl) {
2293 for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
2294 ++tl) {
2295 int j = sl * ts_number_layers + tl;
2296 if (app_input.output_obu) {
2297 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2298 obu_files[j]);
2299 } else {
2300 aom_video_writer_write_frame(
2301 outfile[j],
2302 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2303 pkt->data.frame.sz, pts);
2304 }
2305 if (sl == layer_id.spatial_layer_id)
2306 rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
2307 }
2308 }
2309 got_data = 1;
2310 // Write everything into the top layer.
2311 if (app_input.output_obu) {
2312 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
2313 total_layer_obu_file);
2314 } else {
2315 aom_video_writer_write_frame(
2316 total_layer_file,
2317 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2318 pkt->data.frame.sz, pts);
2319 }
2320 // Keep count of rate control stats per layer (for non-key).
2321 if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
2322 int j = layer_id.spatial_layer_id * ts_number_layers +
2323 layer_id.temporal_layer_id;
2324 assert(j >= 0);
2325 rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
2326 rc.layer_avg_rate_mismatch[j] +=
2327 fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
2328 rc.layer_pfb[j];
2329 if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
2330 }
2331
2332 if (rc_api) {
2333 rc_api->PostEncodeUpdate(pkt->data.frame.sz);
2334 }
2335 // Update for short-time encoding bitrate states, for moving window
2336 // of size rc->window, shifted by rc->window / 2.
2337 // Ignore first window segment, due to key frame.
2338 // For spatial layers: only do this for top/highest SL.
2339 if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
2340 sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2341 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
2342 if (frame_cnt % rc.window_size == 0) {
2343 rc.window_count += 1;
2344 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
2345 rc.variance_st_encoding_bitrate +=
2346 (sum_bitrate / rc.window_size) *
2347 (sum_bitrate / rc.window_size);
2348 sum_bitrate = 0.0;
2349 }
2350 }
2351 // Second shifted window.
2352 if (frame_cnt > rc.window_size + rc.window_size / 2 &&
2353 slx == ss_number_layers - 1) {
2354 sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
2355 if (frame_cnt > 2 * rc.window_size &&
2356 frame_cnt % rc.window_size == 0) {
2357 rc.window_count += 1;
2358 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2359 rc.variance_st_encoding_bitrate +=
2360 (sum_bitrate2 / rc.window_size) *
2361 (sum_bitrate2 / rc.window_size);
2362 sum_bitrate2 = 0.0;
2363 }
2364 }
2365
2366#if CONFIG_AV1_DECODER
2367 if (app_input.decode) {
2368 if (aom_codec_decode(
2369 &decoder,
2370 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2371 pkt->data.frame.sz, NULL))
2372 die_codec(&decoder, "Failed to decode frame");
2373 }
2374#endif
2375
2376 break;
2377 case AOM_CODEC_PSNR_PKT:
2378 if (app_input.show_psnr) {
2379 psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2380 psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2381 for (int plane = 0; plane < 4; plane++) {
2382 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2383 }
2384 psnr_stream.psnr_count[0]++;
2385 }
2386 break;
2387 default: break;
2388 }
2389 }
2390#if CONFIG_AV1_DECODER
2391 if (got_data && app_input.decode) {
2392 // Don't look for mismatch on non reference frames.
2393 if (reference_updated) {
2394 if (test_decode(&codec, &decoder, frame_cnt)) {
2395#if CONFIG_INTERNAL_STATS
2396 fprintf(stats_file, "First mismatch occurred in frame %d\n",
2397 frame_cnt);
2398 fclose(stats_file);
2399#endif
2400 fatal("Mismatch seen");
2401 }
2402 }
2403 }
2404#endif
2405 } // loop over spatial layers
2406 ++frame_cnt;
2407 pts += frame_duration;
2408 }
2409
2410 for (i = 0; i < MAX_NUM_SPATIAL_LAYERS; ++i) {
2411 if (app_input.input_ctx[i].filename == NULL) {
2412 break;
2413 }
2414 close_input_file(&(app_input.input_ctx[i]));
2415 }
2416 printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2417 ts_number_layers);
2418
2419 printf("\n");
2420 for (int slx = 0; slx < ss_number_layers; slx++)
2421 for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2422 int lx = slx * ts_number_layers + tlx;
2423 printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2424 slx, tlx, frame_cnt_layer[lx],
2425 (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2426 1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2427 }
2428
2429 printf("\n");
2430 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2431 frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2432 1000000 * (double)frame_cnt / (double)cx_time);
2433
2434 if (app_input.show_psnr) {
2435 show_psnr(&psnr_stream, 255.0);
2436 }
2437
2438 if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2439
2440#if CONFIG_AV1_DECODER
2441 if (app_input.decode) {
2442 if (aom_codec_destroy(&decoder))
2443 die_codec(&decoder, "Failed to destroy decoder");
2444 }
2445#endif
2446
2447#if CONFIG_INTERNAL_STATS
2448 fprintf(stats_file, "No mismatch detected in recon buffers\n");
2449 fclose(stats_file);
2450#endif
2451
2452 // Try to rewrite the output file headers with the actual frame count.
2453 for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2454 aom_video_writer_close(outfile[i]);
2455 aom_video_writer_close(total_layer_file);
2456
2457 if (has_non_y4m_input) {
2458 aom_img_free(&raw);
2459 }
2460 return EXIT_SUCCESS;
2461}
Describes the decoder algorithm interface to applications.
Describes the encoder algorithm interface to applications.
Describes the aom image descriptor and associated operations.
@ AOM_MIF_KEY_FRAME
Definition aom_image.h:166
@ AOM_CSP_UNKNOWN
Definition aom_image.h:143
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition aom_image.h:38
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
@ AOM_IMG_FMT_I420
Definition aom_image.h:45
enum aom_img_fmt aom_img_fmt_t
List of supported image formats.
int aom_img_add_metadata(aom_image_t *img, uint32_t type, const uint8_t *data, size_t sz, aom_metadata_insert_flags_t insert_flag)
Add metadata to image.
struct aom_image aom_image_t
Image Descriptor.
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
#define AOM_MAX_LAYERS
Definition aomcx.h:1779
struct aom_svc_params aom_svc_params_t
Parameter type for SVC.
#define AOM_MAX_TS_LAYERS
Definition aomcx.h:1781
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
struct aom_roi_map aom_roi_map_t
aom region of interest map
struct aom_svc_layer_id aom_svc_layer_id_t
Struct for spatial and temporal layer ID.
struct aom_active_map aom_active_map_t
aom active region map
struct aom_svc_ref_frame_comp_pred aom_svc_ref_frame_comp_pred_t
Parameters for setting ref frame compound prediction.
struct aom_svc_ref_frame_config aom_svc_ref_frame_config_t
Parameters for setting ref frame config.
@ AOM_FULL_SUPERFRAME_DROP
Definition aomcx.h:1853
@ AV1E_SET_BITRATE_ONE_PASS_CBR
Codec control to set the target bitrate in kilobits per second, unsigned int parameter....
Definition aomcx.h:1545
@ AV1E_SET_ENABLE_SMOOTH_INTRA
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition aomcx.h:1081
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition aomcx.h:418
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition aomcx.h:478
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition aomcx.h:1293
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set the reference frame config, aom_svc_ref_frame_config_t* parameter.
Definition aomcx.h:1303
@ AV1E_SET_TUNE_CONTENT
Codec control function to set content type, aom_tune_content parameter.
Definition aomcx.h:507
@ AOME_SET_ROI_MAP
Codec control function to pass an ROI map to encoder, aom_roi_map_t* parameter.
Definition aomcx.h:185
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition aomcx.h:516
@ AV1E_SET_ENABLE_ANGLE_DELTA
Codec control function to turn on/off intra angle delta, int parameter.
Definition aomcx.h:1128
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition aomcx.h:1271
@ AV1E_SET_INTRA_DEFAULT_TX_ONLY
Control to use default tx type only for intra modes, int parameter.
Definition aomcx.h:1220
@ AV1E_SET_SVC_REF_FRAME_COMP_PRED
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition aomcx.h:1408
@ AV1E_SET_ENABLE_INTRABC
Codec control function to turn on/off intra block copy mode, int parameter.
Definition aomcx.h:1124
@ AV1E_SET_ENABLE_WARPED_MOTION
Codec control function to turn on / off warped motion usage at sequence level, int parameter.
Definition aomcx.h:1049
@ AV1E_SET_RTC_EXTERNAL_RC
Codec control function to set flag for rate control used by external encoders.
Definition aomcx.h:1444
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition aomcx.h:1251
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition aomcx.h:681
@ AOME_SET_ACTIVEMAP
Codec control function to pass an Active map to encoder, aom_active_map_t* parameter.
Definition aomcx.h:190
@ AV1E_SET_DV_COST_UPD_FREQ
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter.
Definition aomcx.h:1374
@ AV1E_SET_SVC_FRAME_DROP_MODE
Codec control to set the frame drop mode for SVC, unsigned int parameter. The valid values are consta...
Definition aomcx.h:1558
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition aomcx.h:1298
@ AV1E_SET_ENABLE_FILTER_INTRA
Codec control function to turn on / off filter intra usage at sequence level, int parameter.
Definition aomcx.h:1070
@ AV1E_SET_ENABLE_PALETTE
Codec control function to turn on/off palette mode, int parameter.
Definition aomcx.h:1120
@ AV1E_SET_ENABLE_CFL_INTRA
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition aomcx.h:1099
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition aomcx.h:312
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition aomcx.h:452
@ AV1E_SET_ENABLE_OBMC
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition aomcx.h:708
@ AV1E_SET_AUTO_TILES
Codec control to set auto tiling, unsigned int parameter. Value of 1 means encoder will set number of...
Definition aomcx.h:1566
@ AV1E_SET_LOOPFILTER_CONTROL
Codec control to control loop filter.
Definition aomcx.h:1424
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode for the next frame to be coded,...
Definition aomcx.h:197
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition aomcx.h:876
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition aomcx.h:1148
@ AV1E_SET_POSTENCODE_DROP_RTC
Codec control to enable post encode frame drop for RTC encoding, int parameter.
Definition aomcx.h:1582
@ AV1E_SET_ENABLE_GLOBAL_MOTION
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition aomcx.h:1039
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition aomcx.h:220
@ AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC
Codec control to get the high motion content flag, used for screen content realtime (RTC) encoding,...
Definition aomcx.h:1573
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition aomcx.h:349
@ AV1E_SET_QUANTIZER_ONE_PASS
Codec control to set quantizer for the next frame, int parameter.
Definition aomcx.h:1507
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition aomcx.h:1261
@ AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR
Codec control to set the maximum number of consecutive frame drops, in units of time (milliseconds),...
Definition aomcx.h:1588
@ AV1_GET_NEW_FRAME_IMAGE
Codec control function to get a pointer to the new frame.
Definition aom.h:70
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition aom_codec.h:232
struct aom_codec_ctx aom_codec_ctx_t
Codec context structure.
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition aom_codec.h:271
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t
Algorithm return codes.
Definition aom_codec.h:155
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition aom_codec.h:542
const void * aom_codec_iter_t
Iterator.
Definition aom_codec.h:305
#define AOM_FRAME_IS_KEY
Definition aom_codec.h:288
@ AOM_BITS_8
Definition aom_codec.h:336
@ AOM_BITS_10
Definition aom_codec.h:337
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition aom_codec.h:157
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition aom_decoder.h:129
#define AOM_USAGE_GOOD_QUALITY
usage parameter analogous to AV1 GOOD QUALITY mode.
Definition aom_encoder.h:1016
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
struct aom_codec_cx_pkt aom_codec_cx_pkt_t
Encoder output packet.
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition aom_encoder.h:945
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
struct aom_codec_enc_cfg aom_codec_enc_cfg_t
Encoder configuration structure.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition aom_encoder.h:1018
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition aom_encoder.h:80
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition aom_encoder.h:79
@ AOM_CBR
Definition aom_encoder.h:187
@ AOM_KF_AUTO
Definition aom_encoder.h:202
@ AOM_CODEC_PSNR_PKT
Definition aom_encoder.h:113
@ AOM_CODEC_CX_FRAME_PKT
Definition aom_encoder.h:110
unsigned int rows
Definition aomcx.h:1679
unsigned int cols
Definition aomcx.h:1680
unsigned char * active_map
specify an on (1) or off (0) each 16x16 region within a frame
Definition aomcx.h:1678
size_t sz
Definition aom_encoder.h:127
enum aom_codec_cx_pkt_kind kind
Definition aom_encoder.h:123
double psnr[4]
Definition aom_encoder.h:145
union aom_codec_cx_pkt::@202210014045072156205127107315337341215221351166 data
aom_codec_frame_flags_t flags
Definition aom_encoder.h:132
struct aom_codec_cx_pkt::@202210014045072156205127107315337341215221351166::@052232317104146204273007241322037340334334344046 frame
void * buf
Definition aom_encoder.h:126
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition aom_encoder.h:477
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition aom_encoder.h:542
struct aom_rational g_timebase
Stream timebase units.
Definition aom_encoder.h:491
unsigned int g_usage
Algorithm specific "usage" value.
Definition aom_encoder.h:401
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition aom_encoder.h:707
unsigned int g_h
Height of the frame.
Definition aom_encoder.h:437
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition aom_encoder.h:770
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition aom_encoder.h:625
unsigned int g_threads
Maximum number of threads to use.
Definition aom_encoder.h:409
unsigned int kf_min_dist
Keyframe minimum interval.
Definition aom_encoder.h:779
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition aom_encoder.h:520
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition aom_encoder.h:716
unsigned int g_profile
Bitstream profile to use.
Definition aom_encoder.h:419
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition aom_encoder.h:469
unsigned int g_w
Width of the frame.
Definition aom_encoder.h:428
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition aom_encoder.h:683
unsigned int kf_max_dist
Keyframe maximum interval.
Definition aom_encoder.h:788
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition aom_encoder.h:499
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition aom_encoder.h:670
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition aom_encoder.h:725
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition aom_encoder.h:660
unsigned int rc_target_bitrate
Target data rate.
Definition aom_encoder.h:646
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition aom_encoder.h:551
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition aom_encoder.h:692
aom_img_fmt_t fmt
Definition aom_image.h:183
unsigned int d_w
Definition aom_image.h:197
unsigned int d_h
Definition aom_image.h:198
int num
Definition aom_encoder.h:165
int den
Definition aom_encoder.h:166
unsigned int cols
Definition aomcx.h:1660
int delta_lf[8]
Definition aomcx.h:1662
int ref_frame[8]
Definition aomcx.h:1664
unsigned int rows
Definition aomcx.h:1659
unsigned char * roi_map
Definition aomcx.h:1658
int delta_q[8]
Definition aomcx.h:1661
uint8_t enabled
Definition aomcx.h:1656
int skip[8]
Definition aomcx.h:1663
aom image scaling mode
Definition aomcx.h:1688
int temporal_layer_id
Definition aomcx.h:1786
int spatial_layer_id
Definition aomcx.h:1785
int max_quantizers[32]
Definition aomcx.h:1810
int number_spatial_layers
Definition aomcx.h:1802
int layer_target_bitrate[32]
Definition aomcx.h:1815
int framerate_factor[8]
Definition aomcx.h:1817
int min_quantizers[32]
Definition aomcx.h:1811
int scaling_factor_den[4]
Definition aomcx.h:1813
int number_temporal_layers
Definition aomcx.h:1809
int scaling_factor_num[4]
Definition aomcx.h:1812
int use_comp_pred[3]
Definition aomcx.h:1847
int reference[7]
Definition aomcx.h:1837
int refresh[8]
Definition aomcx.h:1840
int ref_idx[7]
Definition aomcx.h:1839