AOMedia AV1 Codec
svc_encoder_rtc
1/*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved.
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12// This is an example demonstrating how to implement a multi-layer AOM
13// encoding scheme for RTC video applications.
14
15#include <assert.h>
16#include <limits.h>
17#include <math.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21
22#include <memory>
23
24#include "config/aom_config.h"
25
26#if CONFIG_AV1_DECODER
27#include "aom/aom_decoder.h"
28#endif
29#include "aom/aom_encoder.h"
30#include "aom/aomcx.h"
31#include "common/args.h"
32#include "common/tools_common.h"
33#include "common/video_writer.h"
34#include "examples/encoder_util.h"
35#include "aom_ports/aom_timer.h"
36#include "av1/ratectrl_rtc.h"
37
38#define OPTION_BUFFER_SIZE 1024
39
40typedef struct {
41 const char *output_filename;
42 char options[OPTION_BUFFER_SIZE];
43 struct AvxInputContext input_ctx;
44 int speed;
45 int aq_mode;
46 int layering_mode;
47 int output_obu;
48 int decode;
49 int tune_content;
50 int show_psnr;
51 bool use_external_rc;
52} AppInput;
53
54typedef enum {
55 QUANTIZER = 0,
56 BITRATE,
57 SCALE_FACTOR,
58 AUTO_ALT_REF,
59 ALL_OPTION_TYPES
60} LAYER_OPTION_TYPE;
61
62static const arg_def_t outputfile =
63 ARG_DEF("o", "output", 1, "Output filename");
64static const arg_def_t frames_arg =
65 ARG_DEF("f", "frames", 1, "Number of frames to encode");
66static const arg_def_t threads_arg =
67 ARG_DEF("th", "threads", 1, "Number of threads to use");
68static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "Source width");
69static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "Source height");
70static const arg_def_t timebase_arg =
71 ARG_DEF("t", "timebase", 1, "Timebase (num/den)");
72static const arg_def_t bitrate_arg = ARG_DEF(
73 "b", "target-bitrate", 1, "Encoding bitrate, in kilobits per second");
74static const arg_def_t spatial_layers_arg =
75 ARG_DEF("sl", "spatial-layers", 1, "Number of spatial SVC layers");
76static const arg_def_t temporal_layers_arg =
77 ARG_DEF("tl", "temporal-layers", 1, "Number of temporal SVC layers");
78static const arg_def_t layering_mode_arg =
79 ARG_DEF("lm", "layering-mode", 1, "Temporal layering scheme.");
80static const arg_def_t kf_dist_arg =
81 ARG_DEF("k", "kf-dist", 1, "Number of frames between keyframes");
82static const arg_def_t scale_factors_arg =
83 ARG_DEF("r", "scale-factors", 1, "Scale factors (lowest to highest layer)");
84static const arg_def_t min_q_arg =
85 ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
86static const arg_def_t max_q_arg =
87 ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
88static const arg_def_t speed_arg =
89 ARG_DEF("sp", "speed", 1, "Speed configuration");
90static const arg_def_t aqmode_arg =
91 ARG_DEF("aq", "aqmode", 1, "AQ mode off/on");
92static const arg_def_t bitrates_arg =
93 ARG_DEF("bl", "bitrates", 1,
94 "Bitrates[spatial_layer * num_temporal_layer + temporal_layer]");
95static const arg_def_t dropframe_thresh_arg =
96 ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)");
97static const arg_def_t error_resilient_arg =
98 ARG_DEF(NULL, "error-resilient", 1, "Error resilient flag");
99static const arg_def_t output_obu_arg =
100 ARG_DEF(NULL, "output-obu", 1,
101 "Write OBUs when set to 1. Otherwise write IVF files.");
102static const arg_def_t test_decode_arg =
103 ARG_DEF(NULL, "test-decode", 1,
104 "Attempt to test decoding the output when set to 1. Default is 1.");
105static const arg_def_t psnr_arg =
106 ARG_DEF(NULL, "psnr", -1, "Show PSNR in status line.");
107static const arg_def_t ext_rc_arg =
108 ARG_DEF(NULL, "use-ext-rc", 0, "Use external rate control.");
109static const struct arg_enum_list tune_content_enum[] = {
110 { "default", AOM_CONTENT_DEFAULT },
111 { "screen", AOM_CONTENT_SCREEN },
112 { "film", AOM_CONTENT_FILM },
113 { NULL, 0 }
114};
115static const arg_def_t tune_content_arg = ARG_DEF_ENUM(
116 NULL, "tune-content", 1, "Tune content type", tune_content_enum);
117
118#if CONFIG_AV1_HIGHBITDEPTH
119static const struct arg_enum_list bitdepth_enum[] = { { "8", AOM_BITS_8 },
120 { "10", AOM_BITS_10 },
121 { NULL, 0 } };
122
123static const arg_def_t bitdepth_arg = ARG_DEF_ENUM(
124 "d", "bit-depth", 1, "Bit depth for codec 8 or 10. ", bitdepth_enum);
125#endif // CONFIG_AV1_HIGHBITDEPTH
126
127static const arg_def_t *svc_args[] = {
128 &frames_arg, &outputfile, &width_arg,
129 &height_arg, &timebase_arg, &bitrate_arg,
130 &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg,
131 &min_q_arg, &max_q_arg, &temporal_layers_arg,
132 &layering_mode_arg, &threads_arg, &aqmode_arg,
133#if CONFIG_AV1_HIGHBITDEPTH
134 &bitdepth_arg,
135#endif
136 &speed_arg, &bitrates_arg, &dropframe_thresh_arg,
137 &error_resilient_arg, &output_obu_arg, &test_decode_arg,
138 &tune_content_arg, &psnr_arg, NULL,
139};
140
141#define zero(Dest) memset(&(Dest), 0, sizeof(Dest))
142
143static const char *exec_name;
144
145void usage_exit(void) {
146 fprintf(stderr, "Usage: %s <options> input_filename -o output_filename\n",
147 exec_name);
148 fprintf(stderr, "Options:\n");
149 arg_show_usage(stderr, svc_args);
150 exit(EXIT_FAILURE);
151}
152
153static int file_is_y4m(const char detect[4]) {
154 return memcmp(detect, "YUV4", 4) == 0;
155}
156
157static int fourcc_is_ivf(const char detect[4]) {
158 if (memcmp(detect, "DKIF", 4) == 0) {
159 return 1;
160 }
161 return 0;
162}
163
164static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX,
165 1 };
166
167static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 };
168
169static void open_input_file(struct AvxInputContext *input,
171 /* Parse certain options from the input file, if possible */
172 input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb")
173 : set_binary_mode(stdin);
174
175 if (!input->file) fatal("Failed to open input file");
176
177 if (!fseeko(input->file, 0, SEEK_END)) {
178 /* Input file is seekable. Figure out how long it is, so we can get
179 * progress info.
180 */
181 input->length = ftello(input->file);
182 rewind(input->file);
183 }
184
185 /* Default to 1:1 pixel aspect ratio. */
186 input->pixel_aspect_ratio.numerator = 1;
187 input->pixel_aspect_ratio.denominator = 1;
188
189 /* For RAW input sources, these bytes will applied on the first frame
190 * in read_frame().
191 */
192 input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file);
193 input->detect.position = 0;
194
195 if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) {
196 if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, csp,
197 input->only_i420) >= 0) {
198 input->file_type = FILE_TYPE_Y4M;
199 input->width = input->y4m.pic_w;
200 input->height = input->y4m.pic_h;
201 input->pixel_aspect_ratio.numerator = input->y4m.par_n;
202 input->pixel_aspect_ratio.denominator = input->y4m.par_d;
203 input->framerate.numerator = input->y4m.fps_n;
204 input->framerate.denominator = input->y4m.fps_d;
205 input->fmt = input->y4m.aom_fmt;
206 input->bit_depth = static_cast<aom_bit_depth_t>(input->y4m.bit_depth);
207 } else {
208 fatal("Unsupported Y4M stream.");
209 }
210 } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) {
211 fatal("IVF is not supported as input.");
212 } else {
213 input->file_type = FILE_TYPE_RAW;
214 }
215}
216
217static aom_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input,
218 int *value0, int *value1) {
219 if (type == SCALE_FACTOR) {
220 *value0 = (int)strtol(input, &input, 10);
221 if (*input++ != '/') return AOM_CODEC_INVALID_PARAM;
222 *value1 = (int)strtol(input, &input, 10);
223
224 if (*value0 < option_min_values[SCALE_FACTOR] ||
225 *value1 < option_min_values[SCALE_FACTOR] ||
226 *value0 > option_max_values[SCALE_FACTOR] ||
227 *value1 > option_max_values[SCALE_FACTOR] ||
228 *value0 > *value1) // num shouldn't be greater than den
230 } else {
231 *value0 = atoi(input);
232 if (*value0 < option_min_values[type] || *value0 > option_max_values[type])
234 }
235 return AOM_CODEC_OK;
236}
237
238static aom_codec_err_t parse_layer_options_from_string(
239 aom_svc_params_t *svc_params, LAYER_OPTION_TYPE type, const char *input,
240 int *option0, int *option1) {
242 char *input_string;
243 char *token;
244 const char *delim = ",";
245 int num_layers = svc_params->number_spatial_layers;
246 int i = 0;
247
248 if (type == BITRATE)
249 num_layers =
250 svc_params->number_spatial_layers * svc_params->number_temporal_layers;
251
252 if (input == NULL || option0 == NULL ||
253 (option1 == NULL && type == SCALE_FACTOR))
255
256 const size_t input_length = strlen(input);
257 input_string = reinterpret_cast<char *>(malloc(input_length + 1));
258 if (input_string == NULL) return AOM_CODEC_MEM_ERROR;
259 memcpy(input_string, input, input_length + 1);
260 token = strtok(input_string, delim); // NOLINT
261 for (i = 0; i < num_layers; ++i) {
262 if (token != NULL) {
263 res = extract_option(type, token, option0 + i, option1 + i);
264 if (res != AOM_CODEC_OK) break;
265 token = strtok(NULL, delim); // NOLINT
266 } else {
268 break;
269 }
270 }
271 free(input_string);
272 return res;
273}
274
275static void parse_command_line(int argc, const char **argv_,
276 AppInput *app_input,
277 aom_svc_params_t *svc_params,
278 aom_codec_enc_cfg_t *enc_cfg) {
279 struct arg arg;
280 char **argv = NULL;
281 char **argi = NULL;
282 char **argj = NULL;
283 char string_options[1024] = { 0 };
284
285 // Default settings
286 svc_params->number_spatial_layers = 1;
287 svc_params->number_temporal_layers = 1;
288 app_input->layering_mode = 0;
289 app_input->output_obu = 0;
290 app_input->decode = 1;
291 enc_cfg->g_threads = 1;
292 enc_cfg->rc_end_usage = AOM_CBR;
293
294 // process command line options
295 argv = argv_dup(argc - 1, argv_ + 1);
296 if (!argv) {
297 fprintf(stderr, "Error allocating argument list\n");
298 exit(EXIT_FAILURE);
299 }
300 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
301 arg.argv_step = 1;
302
303 if (arg_match(&arg, &outputfile, argi)) {
304 app_input->output_filename = arg.val;
305 } else if (arg_match(&arg, &width_arg, argi)) {
306 enc_cfg->g_w = arg_parse_uint(&arg);
307 } else if (arg_match(&arg, &height_arg, argi)) {
308 enc_cfg->g_h = arg_parse_uint(&arg);
309 } else if (arg_match(&arg, &timebase_arg, argi)) {
310 enc_cfg->g_timebase = arg_parse_rational(&arg);
311 } else if (arg_match(&arg, &bitrate_arg, argi)) {
312 enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
313 } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
314 svc_params->number_spatial_layers = arg_parse_uint(&arg);
315 } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
316 svc_params->number_temporal_layers = arg_parse_uint(&arg);
317 } else if (arg_match(&arg, &speed_arg, argi)) {
318 app_input->speed = arg_parse_uint(&arg);
319 if (app_input->speed > 11) {
320 aom_tools_warn("Mapping speed %d to speed 11.\n", app_input->speed);
321 }
322 } else if (arg_match(&arg, &aqmode_arg, argi)) {
323 app_input->aq_mode = arg_parse_uint(&arg);
324 } else if (arg_match(&arg, &threads_arg, argi)) {
325 enc_cfg->g_threads = arg_parse_uint(&arg);
326 } else if (arg_match(&arg, &layering_mode_arg, argi)) {
327 app_input->layering_mode = arg_parse_int(&arg);
328 } else if (arg_match(&arg, &kf_dist_arg, argi)) {
329 enc_cfg->kf_min_dist = arg_parse_uint(&arg);
330 enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
331 } else if (arg_match(&arg, &scale_factors_arg, argi)) {
332 aom_codec_err_t res = parse_layer_options_from_string(
333 svc_params, SCALE_FACTOR, arg.val, svc_params->scaling_factor_num,
334 svc_params->scaling_factor_den);
335 if (res != AOM_CODEC_OK) {
336 die("Failed to parse scale factors: %s\n",
338 }
339 } else if (arg_match(&arg, &min_q_arg, argi)) {
340 enc_cfg->rc_min_quantizer = arg_parse_uint(&arg);
341 } else if (arg_match(&arg, &max_q_arg, argi)) {
342 enc_cfg->rc_max_quantizer = arg_parse_uint(&arg);
343#if CONFIG_AV1_HIGHBITDEPTH
344 } else if (arg_match(&arg, &bitdepth_arg, argi)) {
345 enc_cfg->g_bit_depth =
346 static_cast<aom_bit_depth_t>(arg_parse_enum_or_int(&arg));
347 switch (enc_cfg->g_bit_depth) {
348 case AOM_BITS_8:
349 enc_cfg->g_input_bit_depth = 8;
350 enc_cfg->g_profile = 0;
351 break;
352 case AOM_BITS_10:
353 enc_cfg->g_input_bit_depth = 10;
354 enc_cfg->g_profile = 0;
355 break;
356 default:
357 die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
358 }
359#endif // CONFIG_VP9_HIGHBITDEPTH
360 } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) {
361 enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg);
362 } else if (arg_match(&arg, &error_resilient_arg, argi)) {
363 enc_cfg->g_error_resilient = arg_parse_uint(&arg);
364 if (enc_cfg->g_error_resilient != 0 && enc_cfg->g_error_resilient != 1)
365 die("Invalid value for error resilient (0, 1): %d.",
366 enc_cfg->g_error_resilient);
367 } else if (arg_match(&arg, &output_obu_arg, argi)) {
368 app_input->output_obu = arg_parse_uint(&arg);
369 if (app_input->output_obu != 0 && app_input->output_obu != 1)
370 die("Invalid value for obu output flag (0, 1): %d.",
371 app_input->output_obu);
372 } else if (arg_match(&arg, &test_decode_arg, argi)) {
373 app_input->decode = arg_parse_uint(&arg);
374 if (app_input->decode != 0 && app_input->decode != 1)
375 die("Invalid value for test decode flag (0, 1): %d.",
376 app_input->decode);
377 } else if (arg_match(&arg, &tune_content_arg, argi)) {
378 app_input->tune_content = arg_parse_enum_or_int(&arg);
379 printf("tune content %d\n", app_input->tune_content);
380 } else if (arg_match(&arg, &psnr_arg, argi)) {
381 app_input->show_psnr = 1;
382 } else if (arg_match(&arg, &ext_rc_arg, argi)) {
383 app_input->use_external_rc = true;
384 } else {
385 ++argj;
386 }
387 }
388
389 // Total bitrate needs to be parsed after the number of layers.
390 for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
391 arg.argv_step = 1;
392 if (arg_match(&arg, &bitrates_arg, argi)) {
393 aom_codec_err_t res = parse_layer_options_from_string(
394 svc_params, BITRATE, arg.val, svc_params->layer_target_bitrate, NULL);
395 if (res != AOM_CODEC_OK) {
396 die("Failed to parse bitrates: %s\n", aom_codec_err_to_string(res));
397 }
398 } else {
399 ++argj;
400 }
401 }
402
403 // There will be a space in front of the string options
404 if (strlen(string_options) > 0)
405 strncpy(app_input->options, string_options, OPTION_BUFFER_SIZE);
406
407 // Check for unrecognized options
408 for (argi = argv; *argi; ++argi)
409 if (argi[0][0] == '-' && strlen(argi[0]) > 1)
410 die("Error: Unrecognized option %s\n", *argi);
411
412 if (argv[0] == NULL) {
413 usage_exit();
414 }
415
416 app_input->input_ctx.filename = argv[0];
417 free(argv);
418
419 open_input_file(&app_input->input_ctx, AOM_CSP_UNKNOWN);
420 if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) {
421 enc_cfg->g_w = app_input->input_ctx.width;
422 enc_cfg->g_h = app_input->input_ctx.height;
423 }
424
425 if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
426 enc_cfg->g_h % 2)
427 die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
428
429 printf(
430 "Codec %s\n"
431 "layers: %d\n"
432 "width %u, height: %u\n"
433 "num: %d, den: %d, bitrate: %u\n"
434 "gop size: %u\n",
436 svc_params->number_spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
437 enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
438 enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
439}
440
441static int mode_to_num_temporal_layers[12] = {
442 1, 2, 3, 3, 2, 1, 1, 3, 3, 3, 3, 3,
443};
444static int mode_to_num_spatial_layers[12] = {
445 1, 1, 1, 1, 1, 2, 3, 2, 3, 3, 3, 3,
446};
447
448// For rate control encoding stats.
449struct RateControlMetrics {
450 // Number of input frames per layer.
451 int layer_input_frames[AOM_MAX_TS_LAYERS];
452 // Number of encoded non-key frames per layer.
453 int layer_enc_frames[AOM_MAX_TS_LAYERS];
454 // Framerate per layer layer (cumulative).
455 double layer_framerate[AOM_MAX_TS_LAYERS];
456 // Target average frame size per layer (per-frame-bandwidth per layer).
457 double layer_pfb[AOM_MAX_LAYERS];
458 // Actual average frame size per layer.
459 double layer_avg_frame_size[AOM_MAX_LAYERS];
460 // Average rate mismatch per layer (|target - actual| / target).
461 double layer_avg_rate_mismatch[AOM_MAX_LAYERS];
462 // Actual encoding bitrate per layer (cumulative across temporal layers).
463 double layer_encoding_bitrate[AOM_MAX_LAYERS];
464 // Average of the short-time encoder actual bitrate.
465 // TODO(marpan): Should we add these short-time stats for each layer?
466 double avg_st_encoding_bitrate;
467 // Variance of the short-time encoder actual bitrate.
468 double variance_st_encoding_bitrate;
469 // Window (number of frames) for computing short-timee encoding bitrate.
470 int window_size;
471 // Number of window measurements.
472 int window_count;
473 int layer_target_bitrate[AOM_MAX_LAYERS];
474};
475
476static const int REF_FRAMES = 8;
477
478static const int INTER_REFS_PER_FRAME = 7;
479
480// Reference frames used in this example encoder.
481enum {
482 SVC_LAST_FRAME = 0,
483 SVC_LAST2_FRAME,
484 SVC_LAST3_FRAME,
485 SVC_GOLDEN_FRAME,
486 SVC_BWDREF_FRAME,
487 SVC_ALTREF2_FRAME,
488 SVC_ALTREF_FRAME
489};
490
491static int read_frame(struct AvxInputContext *input_ctx, aom_image_t *img) {
492 FILE *f = input_ctx->file;
493 y4m_input *y4m = &input_ctx->y4m;
494 int shortread = 0;
495
496 if (input_ctx->file_type == FILE_TYPE_Y4M) {
497 if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0;
498 } else {
499 shortread = read_yuv_frame(input_ctx, img);
500 }
501
502 return !shortread;
503}
504
505static void close_input_file(struct AvxInputContext *input) {
506 fclose(input->file);
507 if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m);
508}
509
510// Note: these rate control metrics assume only 1 key frame in the
511// sequence (i.e., first frame only). So for temporal pattern# 7
512// (which has key frame for every frame on base layer), the metrics
513// computation will be off/wrong.
514// TODO(marpan): Update these metrics to account for multiple key frames
515// in the stream.
516static void set_rate_control_metrics(struct RateControlMetrics *rc,
517 double framerate, int ss_number_layers,
518 int ts_number_layers) {
519 int ts_rate_decimator[AOM_MAX_TS_LAYERS] = { 1 };
520 ts_rate_decimator[0] = 1;
521 if (ts_number_layers == 2) {
522 ts_rate_decimator[0] = 2;
523 ts_rate_decimator[1] = 1;
524 }
525 if (ts_number_layers == 3) {
526 ts_rate_decimator[0] = 4;
527 ts_rate_decimator[1] = 2;
528 ts_rate_decimator[2] = 1;
529 }
530 // Set the layer (cumulative) framerate and the target layer (non-cumulative)
531 // per-frame-bandwidth, for the rate control encoding stats below.
532 for (int sl = 0; sl < ss_number_layers; ++sl) {
533 int i = sl * ts_number_layers;
534 rc->layer_framerate[0] = framerate / ts_rate_decimator[0];
535 rc->layer_pfb[i] =
536 1000.0 * rc->layer_target_bitrate[i] / rc->layer_framerate[0];
537 for (int tl = 0; tl < ts_number_layers; ++tl) {
538 i = sl * ts_number_layers + tl;
539 if (tl > 0) {
540 rc->layer_framerate[tl] = framerate / ts_rate_decimator[tl];
541 rc->layer_pfb[i] =
542 1000.0 *
543 (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
544 (rc->layer_framerate[tl] - rc->layer_framerate[tl - 1]);
545 }
546 rc->layer_input_frames[tl] = 0;
547 rc->layer_enc_frames[tl] = 0;
548 rc->layer_encoding_bitrate[i] = 0.0;
549 rc->layer_avg_frame_size[i] = 0.0;
550 rc->layer_avg_rate_mismatch[i] = 0.0;
551 }
552 }
553 rc->window_count = 0;
554 rc->window_size = 15;
555 rc->avg_st_encoding_bitrate = 0.0;
556 rc->variance_st_encoding_bitrate = 0.0;
557}
558
559static void printout_rate_control_summary(struct RateControlMetrics *rc,
560 int frame_cnt, int ss_number_layers,
561 int ts_number_layers) {
562 int tot_num_frames = 0;
563 double perc_fluctuation = 0.0;
564 printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
565 printf("Rate control layer stats for %d layer(s):\n\n", ts_number_layers);
566 for (int sl = 0; sl < ss_number_layers; ++sl) {
567 tot_num_frames = 0;
568 for (int tl = 0; tl < ts_number_layers; ++tl) {
569 int i = sl * ts_number_layers + tl;
570 const int num_dropped =
571 tl > 0 ? rc->layer_input_frames[tl] - rc->layer_enc_frames[tl]
572 : rc->layer_input_frames[tl] - rc->layer_enc_frames[tl] - 1;
573 tot_num_frames += rc->layer_input_frames[tl];
574 rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[tl] *
575 rc->layer_encoding_bitrate[i] /
576 tot_num_frames;
577 rc->layer_avg_frame_size[i] =
578 rc->layer_avg_frame_size[i] / rc->layer_enc_frames[tl];
579 rc->layer_avg_rate_mismatch[i] =
580 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[tl];
581 printf("For layer#: %d %d \n", sl, tl);
582 printf("Bitrate (target vs actual): %d %f\n", rc->layer_target_bitrate[i],
583 rc->layer_encoding_bitrate[i]);
584 printf("Average frame size (target vs actual): %f %f\n", rc->layer_pfb[i],
585 rc->layer_avg_frame_size[i]);
586 printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[i]);
587 printf(
588 "Number of input frames, encoded (non-key) frames, "
589 "and perc dropped frames: %d %d %f\n",
590 rc->layer_input_frames[tl], rc->layer_enc_frames[tl],
591 100.0 * num_dropped / rc->layer_input_frames[tl]);
592 printf("\n");
593 }
594 }
595 rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
596 rc->variance_st_encoding_bitrate =
597 rc->variance_st_encoding_bitrate / rc->window_count -
598 (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
599 perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
600 rc->avg_st_encoding_bitrate;
601 printf("Short-time stats, for window of %d frames:\n", rc->window_size);
602 printf("Average, rms-variance, and percent-fluct: %f %f %f\n",
603 rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
604 perc_fluctuation);
605 if (frame_cnt - 1 != tot_num_frames)
606 die("Error: Number of input frames not equal to output!\n");
607}
608
609// Layer pattern configuration.
610static void set_layer_pattern(
611 int layering_mode, int superframe_cnt, aom_svc_layer_id_t *layer_id,
612 aom_svc_ref_frame_config_t *ref_frame_config,
613 aom_svc_ref_frame_comp_pred_t *ref_frame_comp_pred, int *use_svc_control,
614 int spatial_layer_id, int is_key_frame, int ksvc_mode, int speed) {
615 // Setting this flag to 1 enables simplex example of
616 // RPS (Reference Picture Selection) for 1 layer.
617 int use_rps_example = 0;
618 int i;
619 int enable_longterm_temporal_ref = 1;
620 int shift = (layering_mode == 8) ? 2 : 0;
621 int simulcast_mode = (layering_mode == 11);
622 *use_svc_control = 1;
623 layer_id->spatial_layer_id = spatial_layer_id;
624 int lag_index = 0;
625 int base_count = superframe_cnt >> 2;
626 ref_frame_comp_pred->use_comp_pred[0] = 0; // GOLDEN_LAST
627 ref_frame_comp_pred->use_comp_pred[1] = 0; // LAST2_LAST
628 ref_frame_comp_pred->use_comp_pred[2] = 0; // ALTREF_LAST
629 // Set the reference map buffer idx for the 7 references:
630 // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
631 // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
632 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
633 for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->reference[i] = 0;
634 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
635
636 if (ksvc_mode) {
637 // Same pattern as case 9, but the reference strucutre will be constrained
638 // below.
639 layering_mode = 9;
640 }
641 switch (layering_mode) {
642 case 0:
643 if (use_rps_example == 0) {
644 // 1-layer: update LAST on every frame, reference LAST.
645 layer_id->temporal_layer_id = 0;
646 layer_id->spatial_layer_id = 0;
647 ref_frame_config->refresh[0] = 1;
648 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
649 } else {
650 // Pattern of 2 references (ALTREF and GOLDEN) trailing
651 // LAST by 4 and 8 frames, with some switching logic to
652 // sometimes only predict from the longer-term reference
653 //(golden here). This is simple example to test RPS
654 // (reference picture selection).
655 int last_idx = 0;
656 int last_idx_refresh = 0;
657 int gld_idx = 0;
658 int alt_ref_idx = 0;
659 int lag_alt = 4;
660 int lag_gld = 8;
661 layer_id->temporal_layer_id = 0;
662 layer_id->spatial_layer_id = 0;
663 int sh = 8; // slots 0 - 7.
664 // Moving index slot for last: 0 - (sh - 1)
665 if (superframe_cnt > 1) last_idx = (superframe_cnt - 1) % sh;
666 // Moving index for refresh of last: one ahead for next frame.
667 last_idx_refresh = superframe_cnt % sh;
668 // Moving index for gld_ref, lag behind current by lag_gld
669 if (superframe_cnt > lag_gld) gld_idx = (superframe_cnt - lag_gld) % sh;
670 // Moving index for alt_ref, lag behind LAST by lag_alt frames.
671 if (superframe_cnt > lag_alt)
672 alt_ref_idx = (superframe_cnt - lag_alt) % sh;
673 // Set the ref_idx.
674 // Default all references to slot for last.
675 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
676 ref_frame_config->ref_idx[i] = last_idx;
677 // Set the ref_idx for the relevant references.
678 ref_frame_config->ref_idx[SVC_LAST_FRAME] = last_idx;
679 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = last_idx_refresh;
680 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = gld_idx;
681 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = alt_ref_idx;
682 // Refresh this slot, which will become LAST on next frame.
683 ref_frame_config->refresh[last_idx_refresh] = 1;
684 // Reference LAST, ALTREF, and GOLDEN
685 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
686 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
687 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
688 // Switch to only GOLDEN every 300 frames.
689 if (superframe_cnt % 200 == 0 && superframe_cnt > 0) {
690 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
691 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
692 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
693 // Test if the long-term is LAST instead, this is just a renaming
694 // but its tests if encoder behaves the same, whether its
695 // LAST or GOLDEN.
696 if (superframe_cnt % 400 == 0 && superframe_cnt > 0) {
697 ref_frame_config->ref_idx[SVC_LAST_FRAME] = gld_idx;
698 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
699 ref_frame_config->reference[SVC_ALTREF_FRAME] = 0;
700 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
701 }
702 }
703 }
704 break;
705 case 1:
706 // 2-temporal layer.
707 // 1 3 5
708 // 0 2 4
709 // Keep golden fixed at slot 3.
710 base_count = superframe_cnt >> 1;
711 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
712 // Cyclically refresh slots 5, 6, 7, for lag alt ref.
713 lag_index = 5;
714 if (base_count > 0) {
715 lag_index = 5 + (base_count % 3);
716 if (superframe_cnt % 2 != 0) lag_index = 5 + ((base_count + 1) % 3);
717 }
718 // Set the altref slot to lag_index.
719 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
720 if (superframe_cnt % 2 == 0) {
721 layer_id->temporal_layer_id = 0;
722 // Update LAST on layer 0, reference LAST.
723 ref_frame_config->refresh[0] = 1;
724 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
725 // Refresh lag_index slot, needed for lagging golen.
726 ref_frame_config->refresh[lag_index] = 1;
727 // Refresh GOLDEN every x base layer frames.
728 if (base_count % 32 == 0) ref_frame_config->refresh[3] = 1;
729 } else {
730 layer_id->temporal_layer_id = 1;
731 // No updates on layer 1, reference LAST (TL0).
732 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
733 }
734 // Always reference golden and altref on TL0.
735 if (layer_id->temporal_layer_id == 0) {
736 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
737 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
738 }
739 break;
740 case 2:
741 // 3-temporal layer:
742 // 1 3 5 7
743 // 2 6
744 // 0 4 8
745 if (superframe_cnt % 4 == 0) {
746 // Base layer.
747 layer_id->temporal_layer_id = 0;
748 // Update LAST on layer 0, reference LAST.
749 ref_frame_config->refresh[0] = 1;
750 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
751 } else if ((superframe_cnt - 1) % 4 == 0) {
752 layer_id->temporal_layer_id = 2;
753 // First top layer: no updates, only reference LAST (TL0).
754 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
755 } else if ((superframe_cnt - 2) % 4 == 0) {
756 layer_id->temporal_layer_id = 1;
757 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
758 ref_frame_config->refresh[1] = 1;
759 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
760 } else if ((superframe_cnt - 3) % 4 == 0) {
761 layer_id->temporal_layer_id = 2;
762 // Second top layer: no updates, only reference LAST.
763 // Set buffer idx for LAST to slot 1, since that was the slot
764 // updated in previous frame. So LAST is TL1 frame.
765 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
766 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
767 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
768 }
769 break;
770 case 3:
771 // 3 TL, same as above, except allow for predicting
772 // off 2 more references (GOLDEN and ALTREF), with
773 // GOLDEN updated periodically, and ALTREF lagging from
774 // LAST from ~4 frames. Both GOLDEN and ALTREF
775 // can only be updated on base temporal layer.
776
777 // Keep golden fixed at slot 3.
778 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
779 // Cyclically refresh slots 5, 6, 7, for lag altref.
780 lag_index = 5;
781 if (base_count > 0) {
782 lag_index = 5 + (base_count % 3);
783 if (superframe_cnt % 4 != 0) lag_index = 5 + ((base_count + 1) % 3);
784 }
785 // Set the altref slot to lag_index.
786 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = lag_index;
787 if (superframe_cnt % 4 == 0) {
788 // Base layer.
789 layer_id->temporal_layer_id = 0;
790 // Update LAST on layer 0, reference LAST.
791 ref_frame_config->refresh[0] = 1;
792 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
793 // Refresh GOLDEN every x ~10 base layer frames.
794 if (base_count % 10 == 0) ref_frame_config->refresh[3] = 1;
795 // Refresh lag_index slot, needed for lagging altref.
796 ref_frame_config->refresh[lag_index] = 1;
797 } else if ((superframe_cnt - 1) % 4 == 0) {
798 layer_id->temporal_layer_id = 2;
799 // First top layer: no updates, only reference LAST (TL0).
800 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
801 } else if ((superframe_cnt - 2) % 4 == 0) {
802 layer_id->temporal_layer_id = 1;
803 // Middle layer (TL1): update LAST2, only reference LAST (TL0).
804 ref_frame_config->refresh[1] = 1;
805 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
806 } else if ((superframe_cnt - 3) % 4 == 0) {
807 layer_id->temporal_layer_id = 2;
808 // Second top layer: no updates, only reference LAST.
809 // Set buffer idx for LAST to slot 1, since that was the slot
810 // updated in previous frame. So LAST is TL1 frame.
811 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
812 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 0;
813 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
814 }
815 // Every frame can reference GOLDEN AND ALTREF.
816 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
817 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
818 // Allow for compound prediction for LAST-ALTREF and LAST-GOLDEN.
819 if (speed >= 7) {
820 ref_frame_comp_pred->use_comp_pred[2] = 1;
821 ref_frame_comp_pred->use_comp_pred[0] = 1;
822 }
823 break;
824 case 4:
825 // 3-temporal layer: but middle layer updates GF, so 2nd TL2 will
826 // only reference GF (not LAST). Other frames only reference LAST.
827 // 1 3 5 7
828 // 2 6
829 // 0 4 8
830 if (superframe_cnt % 4 == 0) {
831 // Base layer.
832 layer_id->temporal_layer_id = 0;
833 // Update LAST on layer 0, only reference LAST.
834 ref_frame_config->refresh[0] = 1;
835 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
836 } else if ((superframe_cnt - 1) % 4 == 0) {
837 layer_id->temporal_layer_id = 2;
838 // First top layer: no updates, only reference LAST (TL0).
839 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
840 } else if ((superframe_cnt - 2) % 4 == 0) {
841 layer_id->temporal_layer_id = 1;
842 // Middle layer (TL1): update GF, only reference LAST (TL0).
843 ref_frame_config->refresh[3] = 1;
844 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
845 } else if ((superframe_cnt - 3) % 4 == 0) {
846 layer_id->temporal_layer_id = 2;
847 // Second top layer: no updates, only reference GF.
848 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
849 }
850 break;
851 case 5:
852 // 2 spatial layers, 1 temporal.
853 layer_id->temporal_layer_id = 0;
854 if (layer_id->spatial_layer_id == 0) {
855 // Reference LAST, update LAST.
856 ref_frame_config->refresh[0] = 1;
857 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
858 } else if (layer_id->spatial_layer_id == 1) {
859 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
860 // and GOLDEN to slot 0. Update slot 1 (LAST).
861 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
862 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 0;
863 ref_frame_config->refresh[1] = 1;
864 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
865 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
866 }
867 break;
868 case 6:
869 // 3 spatial layers, 1 temporal.
870 // Note for this case, we set the buffer idx for all references to be
871 // either LAST or GOLDEN, which are always valid references, since decoder
872 // will check if any of the 7 references is valid scale in
873 // valid_ref_frame_size().
874 layer_id->temporal_layer_id = 0;
875 if (layer_id->spatial_layer_id == 0) {
876 // Reference LAST, update LAST. Set all buffer_idx to 0.
877 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
878 ref_frame_config->ref_idx[i] = 0;
879 ref_frame_config->refresh[0] = 1;
880 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
881 } else if (layer_id->spatial_layer_id == 1) {
882 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1
883 // and GOLDEN (and all other refs) to slot 0.
884 // Update slot 1 (LAST).
885 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
886 ref_frame_config->ref_idx[i] = 0;
887 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
888 ref_frame_config->refresh[1] = 1;
889 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
890 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
891 } else if (layer_id->spatial_layer_id == 2) {
892 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2
893 // and GOLDEN (and all other refs) to slot 1.
894 // Update slot 2 (LAST).
895 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
896 ref_frame_config->ref_idx[i] = 1;
897 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
898 ref_frame_config->refresh[2] = 1;
899 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
900 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
901 // For 3 spatial layer case: allow for top spatial layer to use
902 // additional temporal reference. Update every 10 frames.
903 if (enable_longterm_temporal_ref) {
904 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
905 ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
906 if (base_count % 10 == 0)
907 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
908 }
909 }
910 break;
911 case 7:
912 // 2 spatial and 3 temporal layer.
913 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
914 if (superframe_cnt % 4 == 0) {
915 // Base temporal layer
916 layer_id->temporal_layer_id = 0;
917 if (layer_id->spatial_layer_id == 0) {
918 // Reference LAST, update LAST
919 // Set all buffer_idx to 0
920 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
921 ref_frame_config->ref_idx[i] = 0;
922 ref_frame_config->refresh[0] = 1;
923 } else if (layer_id->spatial_layer_id == 1) {
924 // Reference LAST and GOLDEN.
925 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
926 ref_frame_config->ref_idx[i] = 0;
927 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
928 ref_frame_config->refresh[1] = 1;
929 }
930 } else if ((superframe_cnt - 1) % 4 == 0) {
931 // First top temporal enhancement layer.
932 layer_id->temporal_layer_id = 2;
933 if (layer_id->spatial_layer_id == 0) {
934 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
935 ref_frame_config->ref_idx[i] = 0;
936 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
937 ref_frame_config->refresh[3] = 1;
938 } else if (layer_id->spatial_layer_id == 1) {
939 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
940 // GOLDEN (and all other refs) to slot 3.
941 // No update.
942 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
943 ref_frame_config->ref_idx[i] = 3;
944 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
945 }
946 } else if ((superframe_cnt - 2) % 4 == 0) {
947 // Middle temporal enhancement layer.
948 layer_id->temporal_layer_id = 1;
949 if (layer_id->spatial_layer_id == 0) {
950 // Reference LAST.
951 // Set all buffer_idx to 0.
952 // Set GOLDEN to slot 5 and update slot 5.
953 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
954 ref_frame_config->ref_idx[i] = 0;
955 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
956 ref_frame_config->refresh[5 - shift] = 1;
957 } else if (layer_id->spatial_layer_id == 1) {
958 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
959 // GOLDEN (and all other refs) to slot 5.
960 // Set LAST3 to slot 6 and update slot 6.
961 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
962 ref_frame_config->ref_idx[i] = 5 - shift;
963 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
964 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
965 ref_frame_config->refresh[6 - shift] = 1;
966 }
967 } else if ((superframe_cnt - 3) % 4 == 0) {
968 // Second top temporal enhancement layer.
969 layer_id->temporal_layer_id = 2;
970 if (layer_id->spatial_layer_id == 0) {
971 // Set LAST to slot 5 and reference LAST.
972 // Set GOLDEN to slot 3 and update slot 3.
973 // Set all other buffer_idx to 0.
974 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
975 ref_frame_config->ref_idx[i] = 0;
976 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
977 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
978 ref_frame_config->refresh[3] = 1;
979 } else if (layer_id->spatial_layer_id == 1) {
980 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
981 // GOLDEN to slot 3. No update.
982 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
983 ref_frame_config->ref_idx[i] = 0;
984 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
985 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
986 }
987 }
988 break;
989 case 8:
990 // 3 spatial and 3 temporal layer.
991 // Same as case 9 but overalap in the buffer slot updates.
992 // (shift = 2). The slots 3 and 4 updated by first TL2 are
993 // reused for update in TL1 superframe.
994 // Note for this case, frame order hint must be disabled for
995 // lower resolutios (operating points > 0) to be decoedable.
996 case 9:
997 // 3 spatial and 3 temporal layer.
998 // No overlap in buffer updates between TL2 and TL1.
999 // TL2 updates slot 3 and 4, TL1 updates 5, 6, 7.
1000 // Set the references via the svc_ref_frame_config control.
1001 // Always reference LAST.
1002 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1003 if (superframe_cnt % 4 == 0) {
1004 // Base temporal layer.
1005 layer_id->temporal_layer_id = 0;
1006 if (layer_id->spatial_layer_id == 0) {
1007 // Reference LAST, update LAST.
1008 // Set all buffer_idx to 0.
1009 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1010 ref_frame_config->ref_idx[i] = 0;
1011 ref_frame_config->refresh[0] = 1;
1012 } else if (layer_id->spatial_layer_id == 1) {
1013 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1014 // GOLDEN (and all other refs) to slot 0.
1015 // Update slot 1 (LAST).
1016 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1017 ref_frame_config->ref_idx[i] = 0;
1018 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1019 ref_frame_config->refresh[1] = 1;
1020 } else if (layer_id->spatial_layer_id == 2) {
1021 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1022 // GOLDEN (and all other refs) to slot 1.
1023 // Update slot 2 (LAST).
1024 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1025 ref_frame_config->ref_idx[i] = 1;
1026 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1027 ref_frame_config->refresh[2] = 1;
1028 }
1029 } else if ((superframe_cnt - 1) % 4 == 0) {
1030 // First top temporal enhancement layer.
1031 layer_id->temporal_layer_id = 2;
1032 if (layer_id->spatial_layer_id == 0) {
1033 // Reference LAST (slot 0).
1034 // Set GOLDEN to slot 3 and update slot 3.
1035 // Set all other buffer_idx to slot 0.
1036 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1037 ref_frame_config->ref_idx[i] = 0;
1038 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1039 ref_frame_config->refresh[3] = 1;
1040 } else if (layer_id->spatial_layer_id == 1) {
1041 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1042 // GOLDEN (and all other refs) to slot 3.
1043 // Set LAST2 to slot 4 and Update slot 4.
1044 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1045 ref_frame_config->ref_idx[i] = 3;
1046 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1047 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1048 ref_frame_config->refresh[4] = 1;
1049 } else if (layer_id->spatial_layer_id == 2) {
1050 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1051 // GOLDEN (and all other refs) to slot 4.
1052 // No update.
1053 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1054 ref_frame_config->ref_idx[i] = 4;
1055 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1056 }
1057 } else if ((superframe_cnt - 2) % 4 == 0) {
1058 // Middle temporal enhancement layer.
1059 layer_id->temporal_layer_id = 1;
1060 if (layer_id->spatial_layer_id == 0) {
1061 // Reference LAST.
1062 // Set all buffer_idx to 0.
1063 // Set GOLDEN to slot 5 and update slot 5.
1064 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1065 ref_frame_config->ref_idx[i] = 0;
1066 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5 - shift;
1067 ref_frame_config->refresh[5 - shift] = 1;
1068 } else if (layer_id->spatial_layer_id == 1) {
1069 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
1070 // GOLDEN (and all other refs) to slot 5.
1071 // Set LAST3 to slot 6 and update slot 6.
1072 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1073 ref_frame_config->ref_idx[i] = 5 - shift;
1074 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1075 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 6 - shift;
1076 ref_frame_config->refresh[6 - shift] = 1;
1077 } else if (layer_id->spatial_layer_id == 2) {
1078 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
1079 // GOLDEN (and all other refs) to slot 6.
1080 // Set LAST3 to slot 7 and update slot 7.
1081 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1082 ref_frame_config->ref_idx[i] = 6 - shift;
1083 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1084 ref_frame_config->ref_idx[SVC_LAST3_FRAME] = 7 - shift;
1085 ref_frame_config->refresh[7 - shift] = 1;
1086 }
1087 } else if ((superframe_cnt - 3) % 4 == 0) {
1088 // Second top temporal enhancement layer.
1089 layer_id->temporal_layer_id = 2;
1090 if (layer_id->spatial_layer_id == 0) {
1091 // Set LAST to slot 5 and reference LAST.
1092 // Set GOLDEN to slot 3 and update slot 3.
1093 // Set all other buffer_idx to 0.
1094 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1095 ref_frame_config->ref_idx[i] = 0;
1096 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5 - shift;
1097 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1098 ref_frame_config->refresh[3] = 1;
1099 } else if (layer_id->spatial_layer_id == 1) {
1100 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
1101 // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
1102 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1103 ref_frame_config->ref_idx[i] = 0;
1104 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
1105 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1106 ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
1107 ref_frame_config->refresh[4] = 1;
1108 } else if (layer_id->spatial_layer_id == 2) {
1109 // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
1110 // GOLDEN to slot 4. No update.
1111 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1112 ref_frame_config->ref_idx[i] = 0;
1113 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 7 - shift;
1114 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 4;
1115 }
1116 }
1117 break;
1118 case 11:
1119 // Simulcast mode for 3 spatial and 3 temporal layers.
1120 // No inter-layer predicton, only prediction is temporal and single
1121 // reference (LAST).
1122 // No overlap in buffer slots between spatial layers. So for example,
1123 // SL0 only uses slots 0 and 1.
1124 // SL1 only uses slots 2 and 3.
1125 // SL2 only uses slots 4 and 5.
1126 // All 7 references for each inter-frame must only access buffer slots
1127 // for that spatial layer.
1128 // On key (super)frames: SL1 and SL2 must have no references set
1129 // and must refresh all the slots for that layer only (so 2 and 3
1130 // for SL1, 4 and 5 for SL2). The base SL0 will be labelled internally
1131 // as a Key frame (refresh all slots). SL1/SL2 will be labelled
1132 // internally as Intra-only frames that allow that stream to be decoded.
1133 // These conditions will allow for each spatial stream to be
1134 // independently decodeable.
1135
1136 // Initialize all references to 0 (don't use reference).
1137 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1138 ref_frame_config->reference[i] = 0;
1139 // Initialize as no refresh/update for all slots.
1140 for (i = 0; i < REF_FRAMES; i++) ref_frame_config->refresh[i] = 0;
1141 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1142 ref_frame_config->ref_idx[i] = 0;
1143
1144 if (is_key_frame) {
1145 if (layer_id->spatial_layer_id == 0) {
1146 // Assign LAST/GOLDEN to slot 0/1.
1147 // Refesh slots 0 and 1 for SL0.
1148 // SL0: this will get set to KEY frame internally.
1149 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1150 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 1;
1151 ref_frame_config->refresh[0] = 1;
1152 ref_frame_config->refresh[1] = 1;
1153 } else if (layer_id->spatial_layer_id == 1) {
1154 // Assign LAST/GOLDEN to slot 2/3.
1155 // Refesh slots 2 and 3 for SL1.
1156 // This will get set to Intra-only frame internally.
1157 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1158 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
1159 ref_frame_config->refresh[2] = 1;
1160 ref_frame_config->refresh[3] = 1;
1161 } else if (layer_id->spatial_layer_id == 2) {
1162 // Assign LAST/GOLDEN to slot 4/5.
1163 // Refresh slots 4 and 5 for SL2.
1164 // This will get set to Intra-only frame internally.
1165 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1166 ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 5;
1167 ref_frame_config->refresh[4] = 1;
1168 ref_frame_config->refresh[5] = 1;
1169 }
1170 } else if (superframe_cnt % 4 == 0) {
1171 // Base temporal layer: TL0
1172 layer_id->temporal_layer_id = 0;
1173 if (layer_id->spatial_layer_id == 0) { // SL0
1174 // Reference LAST. Assign all references to either slot
1175 // 0 or 1. Here we assign LAST to slot 0, all others to 1.
1176 // Update slot 0 (LAST).
1177 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1178 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1179 ref_frame_config->ref_idx[i] = 1;
1180 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1181 ref_frame_config->refresh[0] = 1;
1182 } else if (layer_id->spatial_layer_id == 1) { // SL1
1183 // Reference LAST. Assign all references to either slot
1184 // 2 or 3. Here we assign LAST to slot 2, all others to 3.
1185 // Update slot 2 (LAST).
1186 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1187 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1188 ref_frame_config->ref_idx[i] = 3;
1189 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1190 ref_frame_config->refresh[2] = 1;
1191 } else if (layer_id->spatial_layer_id == 2) { // SL2
1192 // Reference LAST. Assign all references to either slot
1193 // 4 or 5. Here we assign LAST to slot 4, all others to 5.
1194 // Update slot 4 (LAST).
1195 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1196 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1197 ref_frame_config->ref_idx[i] = 5;
1198 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1199 ref_frame_config->refresh[4] = 1;
1200 }
1201 } else if ((superframe_cnt - 1) % 4 == 0) {
1202 // First top temporal enhancement layer: TL2
1203 layer_id->temporal_layer_id = 2;
1204 if (layer_id->spatial_layer_id == 0) { // SL0
1205 // Reference LAST (slot 0). Assign other references to slot 1.
1206 // No update/refresh on any slots.
1207 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1208 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1209 ref_frame_config->ref_idx[i] = 1;
1210 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1211 } else if (layer_id->spatial_layer_id == 1) { // SL1
1212 // Reference LAST (slot 2). Assign other references to slot 3.
1213 // No update/refresh on any slots.
1214 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1215 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1216 ref_frame_config->ref_idx[i] = 3;
1217 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1218 } else if (layer_id->spatial_layer_id == 2) { // SL2
1219 // Reference LAST (slot 4). Assign other references to slot 4.
1220 // No update/refresh on any slots.
1221 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1222 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1223 ref_frame_config->ref_idx[i] = 5;
1224 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1225 }
1226 } else if ((superframe_cnt - 2) % 4 == 0) {
1227 // Middle temporal enhancement layer: TL1
1228 layer_id->temporal_layer_id = 1;
1229 if (layer_id->spatial_layer_id == 0) { // SL0
1230 // Reference LAST (slot 0).
1231 // Set GOLDEN to slot 1 and update slot 1.
1232 // This will be used as reference for next TL2.
1233 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1234 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1235 ref_frame_config->ref_idx[i] = 1;
1236 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 0;
1237 ref_frame_config->refresh[1] = 1;
1238 } else if (layer_id->spatial_layer_id == 1) { // SL1
1239 // Reference LAST (slot 2).
1240 // Set GOLDEN to slot 3 and update slot 3.
1241 // This will be used as reference for next TL2.
1242 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1243 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1244 ref_frame_config->ref_idx[i] = 3;
1245 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 2;
1246 ref_frame_config->refresh[3] = 1;
1247 } else if (layer_id->spatial_layer_id == 2) { // SL2
1248 // Reference LAST (slot 4).
1249 // Set GOLDEN to slot 5 and update slot 5.
1250 // This will be used as reference for next TL2.
1251 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1252 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1253 ref_frame_config->ref_idx[i] = 5;
1254 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 4;
1255 ref_frame_config->refresh[5] = 1;
1256 }
1257 } else if ((superframe_cnt - 3) % 4 == 0) {
1258 // Second top temporal enhancement layer: TL2
1259 layer_id->temporal_layer_id = 2;
1260 if (layer_id->spatial_layer_id == 0) { // SL0
1261 // Reference LAST (slot 1). Assign other references to slot 0.
1262 // No update/refresh on any slots.
1263 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1264 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1265 ref_frame_config->ref_idx[i] = 0;
1266 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
1267 } else if (layer_id->spatial_layer_id == 1) { // SL1
1268 // Reference LAST (slot 3). Assign other references to slot 2.
1269 // No update/refresh on any slots.
1270 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1271 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1272 ref_frame_config->ref_idx[i] = 2;
1273 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 3;
1274 } else if (layer_id->spatial_layer_id == 2) { // SL2
1275 // Reference LAST (slot 5). Assign other references to slot 4.
1276 // No update/refresh on any slots.
1277 ref_frame_config->reference[SVC_LAST_FRAME] = 1;
1278 for (i = 0; i < INTER_REFS_PER_FRAME; i++)
1279 ref_frame_config->ref_idx[i] = 4;
1280 ref_frame_config->ref_idx[SVC_LAST_FRAME] = 5;
1281 }
1282 }
1283 if (!simulcast_mode && layer_id->spatial_layer_id > 0) {
1284 // Always reference GOLDEN (inter-layer prediction).
1285 ref_frame_config->reference[SVC_GOLDEN_FRAME] = 1;
1286 if (ksvc_mode) {
1287 // KSVC: only keep the inter-layer reference (GOLDEN) for
1288 // superframes whose base is key.
1289 if (!is_key_frame) ref_frame_config->reference[SVC_GOLDEN_FRAME] = 0;
1290 }
1291 if (is_key_frame && layer_id->spatial_layer_id > 1) {
1292 // On superframes whose base is key: remove LAST to avoid prediction
1293 // off layer two levels below.
1294 ref_frame_config->reference[SVC_LAST_FRAME] = 0;
1295 }
1296 }
1297 // For 3 spatial layer case 8 (where there is free buffer slot):
1298 // allow for top spatial layer to use additional temporal reference.
1299 // Additional reference is only updated on base temporal layer, every
1300 // 10 TL0 frames here.
1301 if (!simulcast_mode && enable_longterm_temporal_ref &&
1302 layer_id->spatial_layer_id == 2 && layering_mode == 8) {
1303 ref_frame_config->ref_idx[SVC_ALTREF_FRAME] = REF_FRAMES - 1;
1304 if (!is_key_frame) ref_frame_config->reference[SVC_ALTREF_FRAME] = 1;
1305 if (base_count % 10 == 0 && layer_id->temporal_layer_id == 0)
1306 ref_frame_config->refresh[REF_FRAMES - 1] = 1;
1307 }
1308 break;
1309 default: assert(0); die("Error: Unsupported temporal layering mode!\n");
1310 }
1311}
1312
1313#if CONFIG_AV1_DECODER
1314// Returns whether there is a mismatch between the encoder's new frame and the
1315// decoder's new frame.
1316static int test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
1317 const int frames_out) {
1318 aom_image_t enc_img, dec_img;
1319 int mismatch = 0;
1320
1321 /* Get the internal new frame */
1324
1325#if CONFIG_AV1_HIGHBITDEPTH
1326 if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
1327 (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
1328 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1329 aom_image_t enc_hbd_img;
1331 &enc_hbd_img,
1332 static_cast<aom_img_fmt_t>(enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1333 enc_img.d_w, enc_img.d_h, 16);
1334 aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
1335 enc_img = enc_hbd_img;
1336 }
1337 if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1338 aom_image_t dec_hbd_img;
1340 &dec_hbd_img,
1341 static_cast<aom_img_fmt_t>(dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH),
1342 dec_img.d_w, dec_img.d_h, 16);
1343 aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
1344 dec_img = dec_hbd_img;
1345 }
1346 }
1347#endif
1348
1349 if (!aom_compare_img(&enc_img, &dec_img)) {
1350 int y[4], u[4], v[4];
1351#if CONFIG_AV1_HIGHBITDEPTH
1352 if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
1353 aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
1354 } else {
1355 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1356 }
1357#else
1358 aom_find_mismatch(&enc_img, &dec_img, y, u, v);
1359#endif
1360 fprintf(stderr,
1361 "Encode/decode mismatch on frame %d at"
1362 " Y[%d, %d] {%d/%d},"
1363 " U[%d, %d] {%d/%d},"
1364 " V[%d, %d] {%d/%d}\n",
1365 frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0],
1366 v[1], v[2], v[3]);
1367 mismatch = 1;
1368 }
1369
1370 aom_img_free(&enc_img);
1371 aom_img_free(&dec_img);
1372 return mismatch;
1373}
1374#endif // CONFIG_AV1_DECODER
1375
1376struct psnr_stats {
1377 // The second element of these arrays is reserved for high bitdepth.
1378 uint64_t psnr_sse_total[2];
1379 uint64_t psnr_samples_total[2];
1380 double psnr_totals[2][4];
1381 int psnr_count[2];
1382};
1383
1384static void show_psnr(struct psnr_stats *psnr_stream, double peak) {
1385 double ovpsnr;
1386
1387 if (!psnr_stream->psnr_count[0]) return;
1388
1389 fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
1390 ovpsnr = sse_to_psnr((double)psnr_stream->psnr_samples_total[0], peak,
1391 (double)psnr_stream->psnr_sse_total[0]);
1392 fprintf(stderr, " %.3f", ovpsnr);
1393
1394 for (int i = 0; i < 4; i++) {
1395 fprintf(stderr, " %.3f",
1396 psnr_stream->psnr_totals[0][i] / psnr_stream->psnr_count[0]);
1397 }
1398 fprintf(stderr, "\n");
1399}
1400
1401static aom::AV1RateControlRtcConfig create_rtc_rc_config(
1402 const aom_codec_enc_cfg_t &cfg, const AppInput &app_input) {
1403 aom::AV1RateControlRtcConfig rc_cfg;
1404 rc_cfg.width = cfg.g_w;
1405 rc_cfg.height = cfg.g_h;
1406 rc_cfg.max_quantizer = cfg.rc_max_quantizer;
1407 rc_cfg.min_quantizer = cfg.rc_min_quantizer;
1408 rc_cfg.target_bandwidth = cfg.rc_target_bitrate;
1409 rc_cfg.buf_initial_sz = cfg.rc_buf_initial_sz;
1410 rc_cfg.buf_optimal_sz = cfg.rc_buf_optimal_sz;
1411 rc_cfg.buf_sz = cfg.rc_buf_sz;
1412 rc_cfg.overshoot_pct = cfg.rc_overshoot_pct;
1413 rc_cfg.undershoot_pct = cfg.rc_undershoot_pct;
1414 // This is hardcoded as AOME_SET_MAX_INTRA_BITRATE_PCT
1415 rc_cfg.max_intra_bitrate_pct = 300;
1416 rc_cfg.framerate = cfg.g_timebase.den;
1417 // TODO(jianj): Add suppor for SVC.
1418 rc_cfg.ss_number_layers = 1;
1419 rc_cfg.ts_number_layers = 1;
1420 rc_cfg.scaling_factor_num[0] = 1;
1421 rc_cfg.scaling_factor_den[0] = 1;
1422 rc_cfg.layer_target_bitrate[0] = static_cast<int>(rc_cfg.target_bandwidth);
1423 rc_cfg.max_quantizers[0] = rc_cfg.max_quantizer;
1424 rc_cfg.min_quantizers[0] = rc_cfg.min_quantizer;
1425 rc_cfg.aq_mode = app_input.aq_mode;
1426
1427 return rc_cfg;
1428}
1429
1430static int qindex_to_quantizer(int qindex) {
1431 // Table that converts 0-63 range Q values passed in outside to the 0-255
1432 // range Qindex used internally.
1433 static const int quantizer_to_qindex[] = {
1434 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48,
1435 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100,
1436 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152,
1437 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204,
1438 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255,
1439 };
1440 for (int quantizer = 0; quantizer < 64; ++quantizer)
1441 if (quantizer_to_qindex[quantizer] >= qindex) return quantizer;
1442
1443 return 63;
1444}
1445
1446static void set_active_map(const aom_codec_enc_cfg_t *cfg,
1447 aom_codec_ctx_t *codec, int frame_cnt) {
1448 aom_active_map_t map = { 0, 0, 0 };
1449
1450 map.rows = (cfg->g_h + 15) / 16;
1451 map.cols = (cfg->g_w + 15) / 16;
1452
1453 map.active_map = (uint8_t *)malloc(map.rows * map.cols);
1454 if (!map.active_map) die("Failed to allocate active map");
1455
1456 // Example map for testing.
1457 for (unsigned int i = 0; i < map.rows; ++i) {
1458 for (unsigned int j = 0; j < map.cols; ++j) {
1459 int index = map.cols * i + j;
1460 map.active_map[index] = 1;
1461 if (frame_cnt < 300) {
1462 if (i < map.rows / 2 && j < map.cols / 2) map.active_map[index] = 0;
1463 } else if (frame_cnt >= 300) {
1464 if (i < map.rows / 2 && j >= map.cols / 2) map.active_map[index] = 0;
1465 }
1466 }
1467 }
1468
1469 if (aom_codec_control(codec, AOME_SET_ACTIVEMAP, &map))
1470 die_codec(codec, "Failed to set active map");
1471
1472 free(map.active_map);
1473}
1474
1475int main(int argc, const char **argv) {
1476 AppInput app_input;
1477 AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
1478 FILE *obu_files[AOM_MAX_LAYERS] = { NULL };
1479 AvxVideoWriter *total_layer_file = NULL;
1480 FILE *total_layer_obu_file = NULL;
1482 int frame_cnt = 0;
1483 aom_image_t raw;
1484 int frame_avail;
1485 int got_data = 0;
1486 int flags = 0;
1487 int i;
1488 int pts = 0; // PTS starts at 0.
1489 int frame_duration = 1; // 1 timebase tick per frame.
1490 aom_svc_layer_id_t layer_id;
1491 aom_svc_params_t svc_params;
1492 aom_svc_ref_frame_config_t ref_frame_config;
1493 aom_svc_ref_frame_comp_pred_t ref_frame_comp_pred;
1494
1495#if CONFIG_INTERNAL_STATS
1496 FILE *stats_file = fopen("opsnr.stt", "a");
1497 if (stats_file == NULL) {
1498 die("Cannot open opsnr.stt\n");
1499 }
1500#endif
1501#if CONFIG_AV1_DECODER
1502 aom_codec_ctx_t decoder;
1503#endif
1504
1505 struct RateControlMetrics rc;
1506 int64_t cx_time = 0;
1507 int64_t cx_time_layer[AOM_MAX_LAYERS]; // max number of layers.
1508 int frame_cnt_layer[AOM_MAX_LAYERS];
1509 double sum_bitrate = 0.0;
1510 double sum_bitrate2 = 0.0;
1511 double framerate = 30.0;
1512 int use_svc_control = 1;
1513 int set_err_resil_frame = 0;
1514 int test_changing_bitrate = 0;
1515 zero(rc.layer_target_bitrate);
1516 memset(&layer_id, 0, sizeof(aom_svc_layer_id_t));
1517 memset(&app_input, 0, sizeof(AppInput));
1518 memset(&svc_params, 0, sizeof(svc_params));
1519
1520 // Flag to test dynamic scaling of source frames for single
1521 // spatial stream, using the scaling_mode control.
1522 const int test_dynamic_scaling_single_layer = 0;
1523
1524 // Flag to test setting speed per layer.
1525 const int test_speed_per_layer = 0;
1526
1527 // Flag for testing active maps.
1528 const int test_active_maps = 0;
1529
1530 /* Setup default input stream settings */
1531 app_input.input_ctx.framerate.numerator = 30;
1532 app_input.input_ctx.framerate.denominator = 1;
1533 app_input.input_ctx.only_i420 = 0;
1534 app_input.input_ctx.bit_depth = AOM_BITS_8;
1535 app_input.speed = 7;
1536 exec_name = argv[0];
1537
1538 // start with default encoder configuration
1541 if (res != AOM_CODEC_OK) {
1542 die("Failed to get config: %s\n", aom_codec_err_to_string(res));
1543 }
1544
1545 // Real time parameters.
1547
1548 cfg.rc_end_usage = AOM_CBR;
1549 cfg.rc_min_quantizer = 2;
1550 cfg.rc_max_quantizer = 52;
1551 cfg.rc_undershoot_pct = 50;
1552 cfg.rc_overshoot_pct = 50;
1553 cfg.rc_buf_initial_sz = 600;
1554 cfg.rc_buf_optimal_sz = 600;
1555 cfg.rc_buf_sz = 1000;
1556 cfg.rc_resize_mode = 0; // Set to RESIZE_DYNAMIC for dynamic resize.
1557 cfg.g_lag_in_frames = 0;
1558 cfg.kf_mode = AOM_KF_AUTO;
1559
1560 parse_command_line(argc, argv, &app_input, &svc_params, &cfg);
1561
1562 int ts_number_layers = svc_params.number_temporal_layers;
1563 int ss_number_layers = svc_params.number_spatial_layers;
1564
1565 unsigned int width = cfg.g_w;
1566 unsigned int height = cfg.g_h;
1567
1568 if (app_input.layering_mode >= 0) {
1569 if (ts_number_layers !=
1570 mode_to_num_temporal_layers[app_input.layering_mode] ||
1571 ss_number_layers !=
1572 mode_to_num_spatial_layers[app_input.layering_mode]) {
1573 die("Number of layers doesn't match layering mode.");
1574 }
1575 }
1576
1577 // Y4M reader has its own allocation.
1578 if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
1579 if (!aom_img_alloc(&raw, AOM_IMG_FMT_I420, width, height, 32)) {
1580 die("Failed to allocate image (%dx%d)", width, height);
1581 }
1582 }
1583
1585
1586 memcpy(&rc.layer_target_bitrate[0], &svc_params.layer_target_bitrate[0],
1587 sizeof(svc_params.layer_target_bitrate));
1588
1589 unsigned int total_rate = 0;
1590 for (i = 0; i < ss_number_layers; i++) {
1591 total_rate +=
1592 svc_params
1593 .layer_target_bitrate[i * ts_number_layers + ts_number_layers - 1];
1594 }
1595 if (total_rate != cfg.rc_target_bitrate) {
1596 die("Incorrect total target bitrate");
1597 }
1598
1599 svc_params.framerate_factor[0] = 1;
1600 if (ts_number_layers == 2) {
1601 svc_params.framerate_factor[0] = 2;
1602 svc_params.framerate_factor[1] = 1;
1603 } else if (ts_number_layers == 3) {
1604 svc_params.framerate_factor[0] = 4;
1605 svc_params.framerate_factor[1] = 2;
1606 svc_params.framerate_factor[2] = 1;
1607 }
1608
1609 if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
1610 // Override these settings with the info from Y4M file.
1611 cfg.g_w = app_input.input_ctx.width;
1612 cfg.g_h = app_input.input_ctx.height;
1613 // g_timebase is the reciprocal of frame rate.
1614 cfg.g_timebase.num = app_input.input_ctx.framerate.denominator;
1615 cfg.g_timebase.den = app_input.input_ctx.framerate.numerator;
1616 }
1617 framerate = cfg.g_timebase.den / cfg.g_timebase.num;
1618 set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
1619
1620 AvxVideoInfo info;
1621 info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
1622 info.frame_width = cfg.g_w;
1623 info.frame_height = cfg.g_h;
1624 info.time_base.numerator = cfg.g_timebase.num;
1625 info.time_base.denominator = cfg.g_timebase.den;
1626 // Open an output file for each stream.
1627 for (int sl = 0; sl < ss_number_layers; ++sl) {
1628 for (int tl = 0; tl < ts_number_layers; ++tl) {
1629 i = sl * ts_number_layers + tl;
1630 char file_name[PATH_MAX];
1631 snprintf(file_name, sizeof(file_name), "%s_%d.av1",
1632 app_input.output_filename, i);
1633 if (app_input.output_obu) {
1634 obu_files[i] = fopen(file_name, "wb");
1635 if (!obu_files[i]) die("Failed to open %s for writing", file_name);
1636 } else {
1637 outfile[i] = aom_video_writer_open(file_name, kContainerIVF, &info);
1638 if (!outfile[i]) die("Failed to open %s for writing", file_name);
1639 }
1640 }
1641 }
1642 if (app_input.output_obu) {
1643 total_layer_obu_file = fopen(app_input.output_filename, "wb");
1644 if (!total_layer_obu_file)
1645 die("Failed to open %s for writing", app_input.output_filename);
1646 } else {
1647 total_layer_file =
1648 aom_video_writer_open(app_input.output_filename, kContainerIVF, &info);
1649 if (!total_layer_file)
1650 die("Failed to open %s for writing", app_input.output_filename);
1651 }
1652
1653 // Initialize codec.
1654 aom_codec_ctx_t codec;
1655 aom_codec_flags_t flag = 0;
1657 flag |= app_input.show_psnr ? AOM_CODEC_USE_PSNR : 0;
1658 if (aom_codec_enc_init(&codec, encoder, &cfg, flag))
1659 die_codec(&codec, "Failed to initialize encoder");
1660
1661#if CONFIG_AV1_DECODER
1662 if (app_input.decode) {
1663 if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0))
1664 die_codec(&decoder, "Failed to initialize decoder");
1665 }
1666#endif
1667
1668 aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
1669 aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
1684
1685 // Settings to reduce key frame encoding time.
1691
1693
1694 aom_codec_control(&codec, AV1E_SET_TUNE_CONTENT, app_input.tune_content);
1695 if (app_input.tune_content == AOM_CONTENT_SCREEN) {
1697 // INTRABC is currently disabled for rt mode, as it's too slow.
1699 }
1700
1701 if (app_input.use_external_rc) {
1703 }
1704
1706
1709
1711
1712 svc_params.number_spatial_layers = ss_number_layers;
1713 svc_params.number_temporal_layers = ts_number_layers;
1714 for (i = 0; i < ss_number_layers * ts_number_layers; ++i) {
1715 svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
1716 svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
1717 }
1718 for (i = 0; i < ss_number_layers; ++i) {
1719 svc_params.scaling_factor_num[i] = 1;
1720 svc_params.scaling_factor_den[i] = 1;
1721 }
1722 if (ss_number_layers == 2) {
1723 svc_params.scaling_factor_num[0] = 1;
1724 svc_params.scaling_factor_den[0] = 2;
1725 } else if (ss_number_layers == 3) {
1726 svc_params.scaling_factor_num[0] = 1;
1727 svc_params.scaling_factor_den[0] = 4;
1728 svc_params.scaling_factor_num[1] = 1;
1729 svc_params.scaling_factor_den[1] = 2;
1730 }
1731 aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
1732 // TODO(aomedia:3032): Configure KSVC in fixed mode.
1733
1734 // This controls the maximum target size of the key frame.
1735 // For generating smaller key frames, use a smaller max_intra_size_pct
1736 // value, like 100 or 200.
1737 {
1738 const int max_intra_size_pct = 300;
1740 max_intra_size_pct);
1741 }
1742
1743 for (int lx = 0; lx < ts_number_layers * ss_number_layers; lx++) {
1744 cx_time_layer[lx] = 0;
1745 frame_cnt_layer[lx] = 0;
1746 }
1747
1748 std::unique_ptr<aom::AV1RateControlRTC> rc_api;
1749 if (app_input.use_external_rc) {
1750 const aom::AV1RateControlRtcConfig rc_cfg =
1751 create_rtc_rc_config(cfg, app_input);
1752 rc_api = aom::AV1RateControlRTC::Create(rc_cfg);
1753 }
1754
1755 frame_avail = 1;
1756 struct psnr_stats psnr_stream;
1757 memset(&psnr_stream, 0, sizeof(psnr_stream));
1758 while (frame_avail || got_data) {
1759 struct aom_usec_timer timer;
1760 frame_avail = read_frame(&(app_input.input_ctx), &raw);
1761 // Loop over spatial layers.
1762 for (int slx = 0; slx < ss_number_layers; slx++) {
1763 aom_codec_iter_t iter = NULL;
1764 const aom_codec_cx_pkt_t *pkt;
1765 int layer = 0;
1766 // Flag for superframe whose base is key.
1767 int is_key_frame = (frame_cnt % cfg.kf_max_dist) == 0;
1768 // For flexible mode:
1769 if (app_input.layering_mode >= 0) {
1770 // Set the reference/update flags, layer_id, and reference_map
1771 // buffer index.
1772 set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
1773 &ref_frame_config, &ref_frame_comp_pred,
1774 &use_svc_control, slx, is_key_frame,
1775 (app_input.layering_mode == 10), app_input.speed);
1776 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1777 if (use_svc_control) {
1779 &ref_frame_config);
1781 &ref_frame_comp_pred);
1782 }
1783 // Set the speed per layer.
1784 if (test_speed_per_layer) {
1785 int speed_per_layer = 10;
1786 if (layer_id.spatial_layer_id == 0) {
1787 if (layer_id.temporal_layer_id == 0) speed_per_layer = 6;
1788 if (layer_id.temporal_layer_id == 1) speed_per_layer = 7;
1789 if (layer_id.temporal_layer_id == 2) speed_per_layer = 8;
1790 } else if (layer_id.spatial_layer_id == 1) {
1791 if (layer_id.temporal_layer_id == 0) speed_per_layer = 7;
1792 if (layer_id.temporal_layer_id == 1) speed_per_layer = 8;
1793 if (layer_id.temporal_layer_id == 2) speed_per_layer = 9;
1794 } else if (layer_id.spatial_layer_id == 2) {
1795 if (layer_id.temporal_layer_id == 0) speed_per_layer = 8;
1796 if (layer_id.temporal_layer_id == 1) speed_per_layer = 9;
1797 if (layer_id.temporal_layer_id == 2) speed_per_layer = 10;
1798 }
1799 aom_codec_control(&codec, AOME_SET_CPUUSED, speed_per_layer);
1800 }
1801 } else {
1802 // Only up to 3 temporal layers supported in fixed mode.
1803 // Only need to set spatial and temporal layer_id: reference
1804 // prediction, refresh, and buffer_idx are set internally.
1805 layer_id.spatial_layer_id = slx;
1806 layer_id.temporal_layer_id = 0;
1807 if (ts_number_layers == 2) {
1808 layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
1809 } else if (ts_number_layers == 3) {
1810 if (frame_cnt % 2 != 0)
1811 layer_id.temporal_layer_id = 2;
1812 else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
1813 layer_id.temporal_layer_id = 1;
1814 }
1815 aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
1816 }
1817
1818 if (set_err_resil_frame && cfg.g_error_resilient == 0) {
1819 // Set error_resilient per frame: off/0 for base layer and
1820 // on/1 for enhancement layer frames.
1821 // Note that this is can only be done on the fly/per-frame/layer
1822 // if the config error_resilience is off/0. See the logic for updating
1823 // in set_encoder_config():
1824 // tool_cfg->error_resilient_mode =
1825 // cfg->g_error_resilient | extra_cfg->error_resilient_mode;
1826 const int err_resil_mode =
1827 layer_id.spatial_layer_id > 0 || layer_id.temporal_layer_id > 0;
1829 err_resil_mode);
1830 }
1831
1832 layer = slx * ts_number_layers + layer_id.temporal_layer_id;
1833 if (frame_avail && slx == 0) ++rc.layer_input_frames[layer];
1834
1835 if (test_dynamic_scaling_single_layer) {
1836 // Example to scale source down by 2x2, then 4x4, and then back up to
1837 // 2x2, and then back to original.
1838 int frame_2x2 = 200;
1839 int frame_4x4 = 400;
1840 int frame_2x2up = 600;
1841 int frame_orig = 800;
1842 if (frame_cnt >= frame_2x2 && frame_cnt < frame_4x4) {
1843 // Scale source down by 2x2.
1844 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1845 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1846 } else if (frame_cnt >= frame_4x4 && frame_cnt < frame_2x2up) {
1847 // Scale source down by 4x4.
1848 struct aom_scaling_mode mode = { AOME_ONEFOUR, AOME_ONEFOUR };
1849 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1850 } else if (frame_cnt >= frame_2x2up && frame_cnt < frame_orig) {
1851 // Source back up to 2x2.
1852 struct aom_scaling_mode mode = { AOME_ONETWO, AOME_ONETWO };
1853 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1854 } else if (frame_cnt >= frame_orig) {
1855 // Source back up to original resolution (no scaling).
1856 struct aom_scaling_mode mode = { AOME_NORMAL, AOME_NORMAL };
1857 aom_codec_control(&codec, AOME_SET_SCALEMODE, &mode);
1858 }
1859 if (frame_cnt == frame_2x2 || frame_cnt == frame_4x4 ||
1860 frame_cnt == frame_2x2up || frame_cnt == frame_orig) {
1861 // For dynamic resize testing on single layer: refresh all references
1862 // on the resized frame: this is to avoid decode error:
1863 // if resize goes down by >= 4x4 then libaom decoder will throw an
1864 // error that some reference (even though not used) is beyond the
1865 // limit size (must be smaller than 4x4).
1866 for (i = 0; i < REF_FRAMES; i++) ref_frame_config.refresh[i] = 1;
1867 if (use_svc_control) {
1869 &ref_frame_config);
1871 &ref_frame_comp_pred);
1872 }
1873 }
1874 }
1875
1876 // Change target_bitrate every other frame.
1877 if (test_changing_bitrate && frame_cnt % 2 == 0) {
1878 if (frame_cnt < 500)
1879 cfg.rc_target_bitrate += 10;
1880 else
1881 cfg.rc_target_bitrate -= 10;
1882 // Do big increase and decrease.
1883 if (frame_cnt == 100) cfg.rc_target_bitrate <<= 1;
1884 if (frame_cnt == 600) cfg.rc_target_bitrate >>= 1;
1885 if (cfg.rc_target_bitrate < 100) cfg.rc_target_bitrate = 100;
1886 // Call change_config, or bypass with new control.
1887 // res = aom_codec_enc_config_set(&codec, &cfg);
1889 cfg.rc_target_bitrate))
1890 die_codec(&codec, "Failed to SET_BITRATE_ONE_PASS_CBR");
1891 }
1892
1893 if (rc_api) {
1894 aom::AV1FrameParamsRTC frame_params;
1895 // TODO(jianj): Add support for SVC.
1896 frame_params.spatial_layer_id = 0;
1897 frame_params.temporal_layer_id = 0;
1898 frame_params.frame_type =
1899 is_key_frame ? aom::kKeyFrame : aom::kInterFrame;
1900 rc_api->ComputeQP(frame_params);
1901 const int current_qp = rc_api->GetQP();
1903 qindex_to_quantizer(current_qp))) {
1904 die_codec(&codec, "Failed to SET_QUANTIZER_ONE_PASS");
1905 }
1906 }
1907
1908 if (test_active_maps) set_active_map(&cfg, &codec, frame_cnt);
1909
1910 // Do the layer encode.
1911 aom_usec_timer_start(&timer);
1912 if (aom_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags))
1913 die_codec(&codec, "Failed to encode frame");
1914 aom_usec_timer_mark(&timer);
1915 cx_time += aom_usec_timer_elapsed(&timer);
1916 cx_time_layer[layer] += aom_usec_timer_elapsed(&timer);
1917 frame_cnt_layer[layer] += 1;
1918
1919 // Get the high motion content flag.
1920 int content_flag = 0;
1922 &content_flag)) {
1923 die_codec(&codec, "Failed to GET_HIGH_MOTION_CONTENT_SCREEN_RTC");
1924 }
1925
1926 got_data = 0;
1927 // For simulcast (mode 11): write out each spatial layer to the file.
1928 int ss_layers_write = (app_input.layering_mode == 11)
1929 ? layer_id.spatial_layer_id + 1
1930 : ss_number_layers;
1931 while ((pkt = aom_codec_get_cx_data(&codec, &iter))) {
1932 switch (pkt->kind) {
1934 for (int sl = layer_id.spatial_layer_id; sl < ss_layers_write;
1935 ++sl) {
1936 for (int tl = layer_id.temporal_layer_id; tl < ts_number_layers;
1937 ++tl) {
1938 int j = sl * ts_number_layers + tl;
1939 if (app_input.output_obu) {
1940 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1941 obu_files[j]);
1942 } else {
1943 aom_video_writer_write_frame(
1944 outfile[j],
1945 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1946 pkt->data.frame.sz, pts);
1947 }
1948 if (sl == layer_id.spatial_layer_id)
1949 rc.layer_encoding_bitrate[j] += 8.0 * pkt->data.frame.sz;
1950 }
1951 }
1952 got_data = 1;
1953 // Write everything into the top layer.
1954 if (app_input.output_obu) {
1955 fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
1956 total_layer_obu_file);
1957 } else {
1958 aom_video_writer_write_frame(
1959 total_layer_file,
1960 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
1961 pkt->data.frame.sz, pts);
1962 }
1963 // Keep count of rate control stats per layer (for non-key).
1964 if (!(pkt->data.frame.flags & AOM_FRAME_IS_KEY)) {
1965 int j = layer_id.spatial_layer_id * ts_number_layers +
1966 layer_id.temporal_layer_id;
1967 assert(j >= 0);
1968 rc.layer_avg_frame_size[j] += 8.0 * pkt->data.frame.sz;
1969 rc.layer_avg_rate_mismatch[j] +=
1970 fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[j]) /
1971 rc.layer_pfb[j];
1972 if (slx == 0) ++rc.layer_enc_frames[layer_id.temporal_layer_id];
1973 }
1974
1975 if (rc_api) {
1976 rc_api->PostEncodeUpdate(pkt->data.frame.sz);
1977 }
1978 // Update for short-time encoding bitrate states, for moving window
1979 // of size rc->window, shifted by rc->window / 2.
1980 // Ignore first window segment, due to key frame.
1981 // For spatial layers: only do this for top/highest SL.
1982 if (frame_cnt > rc.window_size && slx == ss_number_layers - 1) {
1983 sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1984 rc.window_size = (rc.window_size <= 0) ? 1 : rc.window_size;
1985 if (frame_cnt % rc.window_size == 0) {
1986 rc.window_count += 1;
1987 rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
1988 rc.variance_st_encoding_bitrate +=
1989 (sum_bitrate / rc.window_size) *
1990 (sum_bitrate / rc.window_size);
1991 sum_bitrate = 0.0;
1992 }
1993 }
1994 // Second shifted window.
1995 if (frame_cnt > rc.window_size + rc.window_size / 2 &&
1996 slx == ss_number_layers - 1) {
1997 sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
1998 if (frame_cnt > 2 * rc.window_size &&
1999 frame_cnt % rc.window_size == 0) {
2000 rc.window_count += 1;
2001 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
2002 rc.variance_st_encoding_bitrate +=
2003 (sum_bitrate2 / rc.window_size) *
2004 (sum_bitrate2 / rc.window_size);
2005 sum_bitrate2 = 0.0;
2006 }
2007 }
2008
2009#if CONFIG_AV1_DECODER
2010 if (app_input.decode) {
2011 if (aom_codec_decode(
2012 &decoder,
2013 reinterpret_cast<const uint8_t *>(pkt->data.frame.buf),
2014 pkt->data.frame.sz, NULL))
2015 die_codec(&decoder, "Failed to decode frame");
2016 }
2017#endif
2018
2019 break;
2020 case AOM_CODEC_PSNR_PKT:
2021 if (app_input.show_psnr) {
2022 psnr_stream.psnr_sse_total[0] += pkt->data.psnr.sse[0];
2023 psnr_stream.psnr_samples_total[0] += pkt->data.psnr.samples[0];
2024 for (int plane = 0; plane < 4; plane++) {
2025 psnr_stream.psnr_totals[0][plane] += pkt->data.psnr.psnr[plane];
2026 }
2027 psnr_stream.psnr_count[0]++;
2028 }
2029 break;
2030 default: break;
2031 }
2032 }
2033#if CONFIG_AV1_DECODER
2034 if (got_data && app_input.decode) {
2035 // Don't look for mismatch on top spatial and top temporal layers as
2036 // they are non reference frames.
2037 if ((ss_number_layers > 1 || ts_number_layers > 1) &&
2038 !(layer_id.temporal_layer_id > 0 &&
2039 layer_id.temporal_layer_id == ts_number_layers - 1)) {
2040 if (test_decode(&codec, &decoder, frame_cnt)) {
2041#if CONFIG_INTERNAL_STATS
2042 fprintf(stats_file, "First mismatch occurred in frame %d\n",
2043 frame_cnt);
2044 fclose(stats_file);
2045#endif
2046 fatal("Mismatch seen");
2047 }
2048 }
2049 }
2050#endif
2051 } // loop over spatial layers
2052 ++frame_cnt;
2053 pts += frame_duration;
2054 }
2055
2056 close_input_file(&(app_input.input_ctx));
2057 printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
2058 ts_number_layers);
2059
2060 printf("\n");
2061 for (int slx = 0; slx < ss_number_layers; slx++)
2062 for (int tlx = 0; tlx < ts_number_layers; tlx++) {
2063 int lx = slx * ts_number_layers + tlx;
2064 printf("Per layer encoding time/FPS stats for encoder: %d %d %d %f %f \n",
2065 slx, tlx, frame_cnt_layer[lx],
2066 (float)cx_time_layer[lx] / (double)(frame_cnt_layer[lx] * 1000),
2067 1000000 * (double)frame_cnt_layer[lx] / (double)cx_time_layer[lx]);
2068 }
2069
2070 printf("\n");
2071 printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f\n",
2072 frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000),
2073 1000000 * (double)frame_cnt / (double)cx_time);
2074
2075 if (app_input.show_psnr) {
2076 show_psnr(&psnr_stream, 255.0);
2077 }
2078
2079 if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy encoder");
2080
2081#if CONFIG_AV1_DECODER
2082 if (app_input.decode) {
2083 if (aom_codec_destroy(&decoder))
2084 die_codec(&decoder, "Failed to destroy decoder");
2085 }
2086#endif
2087
2088#if CONFIG_INTERNAL_STATS
2089 fprintf(stats_file, "No mismatch detected in recon buffers\n");
2090 fclose(stats_file);
2091#endif
2092
2093 // Try to rewrite the output file headers with the actual frame count.
2094 for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
2095 aom_video_writer_close(outfile[i]);
2096 aom_video_writer_close(total_layer_file);
2097
2098 if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) {
2099 aom_img_free(&raw);
2100 }
2101 return EXIT_SUCCESS;
2102}
Describes the decoder algorithm interface to applications.
Describes the encoder algorithm interface to applications.
@ AOM_CSP_UNKNOWN
Definition aom_image.h:143
enum aom_chroma_sample_position aom_chroma_sample_position_t
List of chroma sample positions.
#define AOM_IMG_FMT_HIGHBITDEPTH
Definition aom_image.h:38
aom_image_t * aom_img_alloc(aom_image_t *img, aom_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
@ AOM_IMG_FMT_I420
Definition aom_image.h:45
enum aom_img_fmt aom_img_fmt_t
List of supported image formats.
void aom_img_free(aom_image_t *img)
Close an image descriptor.
Provides definitions for using AOM or AV1 encoder algorithm within the aom Codec Interface.
#define AOM_MAX_LAYERS
Definition aomcx.h:1692
#define AOM_MAX_TS_LAYERS
Definition aomcx.h:1694
aom_codec_iface_t * aom_codec_av1_cx(void)
The interface to the AV1 encoder.
@ AOM_FULL_SUPERFRAME_DROP
Definition aomcx.h:1754
@ AV1E_SET_BITRATE_ONE_PASS_CBR
Codec control to set the target bitrate in kilobits per second, unsigned int parameter....
Definition aomcx.h:1527
@ AV1E_SET_ENABLE_SMOOTH_INTRA
Codec control function to turn on / off smooth intra modes usage, int parameter.
Definition aomcx.h:1070
@ AV1E_SET_ENABLE_TPL_MODEL
Codec control function to enable RDO modulated by frame temporal dependency, unsigned int parameter.
Definition aomcx.h:408
@ AV1E_SET_AQ_MODE
Codec control function to set adaptive quantization mode, unsigned int parameter.
Definition aomcx.h:468
@ AV1E_SET_SVC_LAYER_ID
Codec control function to set the layer id, aom_svc_layer_id_t* parameter.
Definition aomcx.h:1276
@ AV1E_SET_SVC_REF_FRAME_CONFIG
Codec control function to set the reference frame config, aom_svc_ref_frame_config_t* parameter.
Definition aomcx.h:1286
@ AV1E_SET_TUNE_CONTENT
Codec control function to set content type, aom_tune_content parameter.
Definition aomcx.h:497
@ AV1E_SET_CDF_UPDATE_MODE
Codec control function to set CDF update mode, unsigned int parameter.
Definition aomcx.h:506
@ AV1E_SET_ENABLE_ANGLE_DELTA
Codec control function to turn on/off intra angle delta, int parameter.
Definition aomcx.h:1117
@ AV1E_SET_MV_COST_UPD_FREQ
Control to set frequency of the cost updates for motion vectors, unsigned int parameter.
Definition aomcx.h:1254
@ AV1E_SET_INTRA_DEFAULT_TX_ONLY
Control to use default tx type only for intra modes, int parameter.
Definition aomcx.h:1203
@ AV1E_SET_SVC_REF_FRAME_COMP_PRED
Codec control function to set reference frame compound prediction. aom_svc_ref_frame_comp_pred_t* par...
Definition aomcx.h:1391
@ AV1E_SET_ENABLE_INTRABC
Codec control function to turn on/off intra block copy mode, int parameter.
Definition aomcx.h:1113
@ AV1E_SET_ENABLE_WARPED_MOTION
Codec control function to turn on / off warped motion usage at sequence level, int parameter.
Definition aomcx.h:1038
@ AV1E_SET_RTC_EXTERNAL_RC
Codec control function to set flag for rate control used by external encoders.
Definition aomcx.h:1426
@ AV1E_SET_COEFF_COST_UPD_FREQ
Control to set frequency of the cost updates for coefficients, unsigned int parameter.
Definition aomcx.h:1234
@ AV1E_SET_ENABLE_CDEF
Codec control function to encode with CDEF, unsigned int parameter.
Definition aomcx.h:670
@ AOME_SET_ACTIVEMAP
Codec control function to pass an Active map to encoder, aom_active_map_t* parameter.
Definition aomcx.h:190
@ AV1E_SET_DV_COST_UPD_FREQ
Control to set frequency of the cost updates for intrabc motion vectors, unsigned int parameter.
Definition aomcx.h:1357
@ AV1E_SET_SVC_FRAME_DROP_MODE
Codec control to set the frame drop mode for SVC, unsigned int parameter. The valid values are consta...
Definition aomcx.h:1540
@ AV1E_SET_SVC_PARAMS
Codec control function to set SVC parameters, aom_svc_params_t* parameter.
Definition aomcx.h:1281
@ AV1E_SET_ENABLE_FILTER_INTRA
Codec control function to turn on / off filter intra usage at sequence level, int parameter.
Definition aomcx.h:1059
@ AV1E_SET_ENABLE_PALETTE
Codec control function to turn on/off palette mode, int parameter.
Definition aomcx.h:1109
@ AV1E_SET_ENABLE_CFL_INTRA
Codec control function to turn on / off CFL uv intra mode usage, int parameter.
Definition aomcx.h:1088
@ AOME_SET_MAX_INTRA_BITRATE_PCT
Codec control function to set max data rate for intra frames, unsigned int parameter.
Definition aomcx.h:306
@ AV1E_SET_ERROR_RESILIENT_MODE
Codec control function to enable error_resilient_mode, int parameter.
Definition aomcx.h:442
@ AV1E_SET_ENABLE_OBMC
Codec control function to predict with OBMC mode, unsigned int parameter.
Definition aomcx.h:697
@ AV1E_SET_AUTO_TILES
Codec control to set auto tiling, unsigned int parameter. Value of 1 means encoder will set number of...
Definition aomcx.h:1548
@ AV1E_SET_LOOPFILTER_CONTROL
Codec control to control loop filter.
Definition aomcx.h:1406
@ AOME_SET_SCALEMODE
Codec control function to set encoder scaling mode for the next frame to be coded,...
Definition aomcx.h:197
@ AV1E_SET_ENABLE_ORDER_HINT
Codec control function to turn on / off frame order hint (int parameter). Affects: joint compound mod...
Definition aomcx.h:865
@ AV1E_SET_DELTAQ_MODE
Codec control function to set the delta q mode, unsigned int parameter.
Definition aomcx.h:1131
@ AV1E_SET_POSTENCODE_DROP_RTC
Codec control to enable post encode frame drop for RTC encoding, int parameter.
Definition aomcx.h:1564
@ AV1E_SET_ENABLE_GLOBAL_MOTION
Codec control function to turn on / off global motion usage for a sequence, int parameter.
Definition aomcx.h:1028
@ AOME_SET_CPUUSED
Codec control function to set encoder internal speed settings, int parameter.
Definition aomcx.h:220
@ AV1E_GET_HIGH_MOTION_CONTENT_SCREEN_RTC
Codec control to get the high motion content flag, used for screen content realtime (RTC) encoding,...
Definition aomcx.h:1555
@ AV1E_SET_GF_CBR_BOOST_PCT
Boost percentage for Golden Frame in CBR mode, unsigned int parameter.
Definition aomcx.h:339
@ AV1E_SET_QUANTIZER_ONE_PASS
Codec control to set quantizer for the next frame, int parameter.
Definition aomcx.h:1489
@ AV1E_SET_MODE_COST_UPD_FREQ
Control to set frequency of the cost updates for mode, unsigned int parameter.
Definition aomcx.h:1244
@ AV1E_SET_MAX_CONSEC_FRAME_DROP_MS_CBR
Codec control to set the maximum number of consecutive frame drops, in units of time (milliseconds),...
Definition aomcx.h:1570
@ AV1_GET_NEW_FRAME_IMAGE
Codec control function to get a pointer to the new frame.
Definition aom.h:70
const char * aom_codec_iface_name(aom_codec_iface_t *iface)
Return the name for a given interface.
enum aom_bit_depth aom_bit_depth_t
Bit depth for codecThis enumeration determines the bit depth of the codec.
aom_codec_err_t aom_codec_control(aom_codec_ctx_t *ctx, int ctrl_id,...)
Algorithm Control.
long aom_codec_flags_t
Initialization-time Feature Enabling.
Definition aom_codec.h:232
const struct aom_codec_iface aom_codec_iface_t
Codec interface structure.
Definition aom_codec.h:271
aom_codec_err_t aom_codec_destroy(aom_codec_ctx_t *ctx)
Destroy a codec instance.
const char * aom_codec_err_to_string(aom_codec_err_t err)
Convert error number to printable string.
aom_codec_err_t
Algorithm return codes.
Definition aom_codec.h:155
#define AOM_CODEC_CONTROL_TYPECHECKED(ctx, id, data)
aom_codec_control wrapper macro (adds type-checking, less flexible)
Definition aom_codec.h:542
const void * aom_codec_iter_t
Iterator.
Definition aom_codec.h:305
#define AOM_FRAME_IS_KEY
Definition aom_codec.h:288
@ AOM_BITS_8
Definition aom_codec.h:336
@ AOM_BITS_10
Definition aom_codec.h:337
@ AOM_CODEC_INVALID_PARAM
An application-supplied parameter is not valid.
Definition aom_codec.h:200
@ AOM_CODEC_MEM_ERROR
Memory operation failed.
Definition aom_codec.h:163
@ AOM_CODEC_OK
Operation completed without error.
Definition aom_codec.h:157
aom_codec_err_t aom_codec_decode(aom_codec_ctx_t *ctx, const uint8_t *data, size_t data_sz, void *user_priv)
Decode data.
#define aom_codec_dec_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_dec_init_ver()
Definition aom_decoder.h:129
const aom_codec_cx_pkt_t * aom_codec_get_cx_data(aom_codec_ctx_t *ctx, aom_codec_iter_t *iter)
Encoded data iterator.
aom_codec_err_t aom_codec_encode(aom_codec_ctx_t *ctx, const aom_image_t *img, aom_codec_pts_t pts, unsigned long duration, aom_enc_frame_flags_t flags)
Encode a frame.
#define aom_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for aom_codec_enc_init_ver()
Definition aom_encoder.h:943
aom_codec_err_t aom_codec_enc_config_default(aom_codec_iface_t *iface, aom_codec_enc_cfg_t *cfg, unsigned int usage)
Get the default configuration for a usage.
#define AOM_USAGE_REALTIME
usage parameter analogous to AV1 REALTIME mode.
Definition aom_encoder.h:1016
#define AOM_CODEC_USE_HIGHBITDEPTH
Definition aom_encoder.h:80
#define AOM_CODEC_USE_PSNR
Initialization-time Feature Enabling.
Definition aom_encoder.h:79
@ AOM_CBR
Definition aom_encoder.h:187
@ AOM_KF_AUTO
Definition aom_encoder.h:202
@ AOM_CODEC_PSNR_PKT
Definition aom_encoder.h:113
@ AOM_CODEC_CX_FRAME_PKT
Definition aom_encoder.h:110
aom active region map
Definition aomcx.h:1626
unsigned int rows
Definition aomcx.h:1629
unsigned int cols
Definition aomcx.h:1630
unsigned char * active_map
specify an on (1) or off (0) each 16x16 region within a frame
Definition aomcx.h:1628
Codec context structure.
Definition aom_codec.h:315
Encoder output packet.
Definition aom_encoder.h:122
size_t sz
Definition aom_encoder.h:127
enum aom_codec_cx_pkt_kind kind
Definition aom_encoder.h:123
double psnr[4]
Definition aom_encoder.h:145
union aom_codec_cx_pkt::@1 data
struct aom_codec_cx_pkt::@1::@2 frame
aom_codec_frame_flags_t flags
Definition aom_encoder.h:132
void * buf
Definition aom_encoder.h:126
Encoder configuration structure.
Definition aom_encoder.h:387
unsigned int g_input_bit_depth
Bit-depth of the input frames.
Definition aom_encoder.h:475
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition aom_encoder.h:540
struct aom_rational g_timebase
Stream timebase units.
Definition aom_encoder.h:489
unsigned int g_usage
Algorithm specific "usage" value.
Definition aom_encoder.h:399
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition aom_encoder.h:705
unsigned int g_h
Height of the frame.
Definition aom_encoder.h:435
enum aom_kf_mode kf_mode
Keyframe placement mode.
Definition aom_encoder.h:768
enum aom_rc_mode rc_end_usage
Rate control algorithm to use.
Definition aom_encoder.h:623
unsigned int g_threads
Maximum number of threads to use.
Definition aom_encoder.h:407
unsigned int kf_min_dist
Keyframe minimum interval.
Definition aom_encoder.h:777
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition aom_encoder.h:518
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition aom_encoder.h:714
unsigned int g_profile
Bitstream profile to use.
Definition aom_encoder.h:417
aom_bit_depth_t g_bit_depth
Bit-depth of the codec.
Definition aom_encoder.h:467
unsigned int g_w
Width of the frame.
Definition aom_encoder.h:426
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition aom_encoder.h:681
unsigned int kf_max_dist
Keyframe maximum interval.
Definition aom_encoder.h:786
aom_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition aom_encoder.h:497
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition aom_encoder.h:668
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition aom_encoder.h:723
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition aom_encoder.h:658
unsigned int rc_target_bitrate
Target data rate.
Definition aom_encoder.h:644
unsigned int rc_resize_mode
Mode for spatial resampling, if supported by the codec.
Definition aom_encoder.h:549
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition aom_encoder.h:690
Image Descriptor.
Definition aom_image.h:182
aom_img_fmt_t fmt
Definition aom_image.h:183
unsigned int d_w
Definition aom_image.h:197
unsigned int d_h
Definition aom_image.h:198
int num
Definition aom_encoder.h:165
int den
Definition aom_encoder.h:166
aom image scaling mode
Definition aomcx.h:1638
Definition aomcx.h:1697
int temporal_layer_id
Definition aomcx.h:1699
int spatial_layer_id
Definition aomcx.h:1698
Definition aomcx.h:1708
int max_quantizers[32]
Definition aomcx.h:1711
int number_spatial_layers
Definition aomcx.h:1709
int layer_target_bitrate[32]
Definition aomcx.h:1716
int framerate_factor[8]
Definition aomcx.h:1718
int min_quantizers[32]
Definition aomcx.h:1712
int scaling_factor_den[4]
Definition aomcx.h:1714
int number_temporal_layers
Definition aomcx.h:1710
int scaling_factor_num[4]
Definition aomcx.h:1713
Definition aomcx.h:1745
int use_comp_pred[3]
Definition aomcx.h:1748
Definition aomcx.h:1722
int reference[7]
Definition aomcx.h:1738
int refresh[8]
Definition aomcx.h:1741
int ref_idx[7]
Definition aomcx.h:1740