Line data Source code
1 : /*
2 : * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 : *
4 : * This source code is subject to the terms of the BSD 2 Clause License and
5 : * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 : * was not distributed with this source code in the LICENSE file, you can
7 : * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 : * Media Patent License 1.0 was not distributed with this source code in the
9 : * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 : */
11 :
12 : #include "av1/encoder/encodeframe.h"
13 : #include "av1/encoder/encoder.h"
14 : #include "av1/encoder/ethread.h"
15 : #include "aom_dsp/aom_dsp_common.h"
16 :
17 0 : static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
18 : int i, j, k, l, m, n;
19 :
20 0 : for (i = 0; i < REFERENCE_MODES; i++)
21 0 : td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i];
22 :
23 : #if CONFIG_GLOBAL_MOTION
24 0 : for (i = 0; i < TOTAL_REFS_PER_FRAME; i++)
25 0 : td->rd_counts.global_motion_used[i] +=
26 0 : td_t->rd_counts.global_motion_used[i];
27 : #endif // CONFIG_GLOBAL_MOTION
28 :
29 0 : for (i = 0; i < TX_SIZES; i++)
30 0 : for (j = 0; j < PLANE_TYPES; j++)
31 0 : for (k = 0; k < REF_TYPES; k++)
32 0 : for (l = 0; l < COEF_BANDS; l++)
33 0 : for (m = 0; m < COEFF_CONTEXTS; m++)
34 0 : for (n = 0; n < ENTROPY_TOKENS; n++)
35 0 : td->rd_counts.coef_counts[i][j][k][l][m][n] +=
36 0 : td_t->rd_counts.coef_counts[i][j][k][l][m][n];
37 0 : }
38 :
39 0 : static int enc_worker_hook(EncWorkerData *const thread_data, void *unused) {
40 0 : AV1_COMP *const cpi = thread_data->cpi;
41 0 : const AV1_COMMON *const cm = &cpi->common;
42 0 : const int tile_cols = cm->tile_cols;
43 0 : const int tile_rows = cm->tile_rows;
44 : int t;
45 :
46 : (void)unused;
47 :
48 0 : for (t = thread_data->start; t < tile_rows * tile_cols;
49 0 : t += cpi->num_workers) {
50 0 : int tile_row = t / tile_cols;
51 0 : int tile_col = t % tile_cols;
52 :
53 0 : av1_encode_tile(cpi, thread_data->td, tile_row, tile_col);
54 : }
55 :
56 0 : return 0;
57 : }
58 :
59 0 : void av1_encode_tiles_mt(AV1_COMP *cpi) {
60 0 : AV1_COMMON *const cm = &cpi->common;
61 0 : const int tile_cols = cm->tile_cols;
62 0 : const AVxWorkerInterface *const winterface = aom_get_worker_interface();
63 0 : const int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols);
64 : int i;
65 :
66 0 : av1_init_tile_data(cpi);
67 :
68 : // Only run once to create threads and allocate thread data.
69 0 : if (cpi->num_workers == 0) {
70 0 : CHECK_MEM_ERROR(cm, cpi->workers,
71 : aom_malloc(num_workers * sizeof(*cpi->workers)));
72 :
73 0 : CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
74 : aom_calloc(num_workers, sizeof(*cpi->tile_thr_data)));
75 :
76 0 : for (i = 0; i < num_workers; i++) {
77 0 : AVxWorker *const worker = &cpi->workers[i];
78 0 : EncWorkerData *const thread_data = &cpi->tile_thr_data[i];
79 :
80 0 : ++cpi->num_workers;
81 0 : winterface->init(worker);
82 :
83 0 : thread_data->cpi = cpi;
84 :
85 0 : if (i < num_workers - 1) {
86 : // Allocate thread data.
87 0 : CHECK_MEM_ERROR(cm, thread_data->td,
88 : aom_memalign(32, sizeof(*thread_data->td)));
89 0 : av1_zero(*thread_data->td);
90 :
91 : // Set up pc_tree.
92 0 : thread_data->td->leaf_tree = NULL;
93 0 : thread_data->td->pc_tree = NULL;
94 0 : av1_setup_pc_tree(cm, thread_data->td);
95 :
96 : #if CONFIG_MOTION_VAR
97 : #if CONFIG_HIGHBITDEPTH
98 0 : int buf_scaler = 2;
99 : #else
100 : int buf_scaler = 1;
101 : #endif
102 0 : CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf,
103 : (uint8_t *)aom_memalign(
104 : 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
105 : sizeof(*thread_data->td->above_pred_buf)));
106 0 : CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf,
107 : (uint8_t *)aom_memalign(
108 : 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE *
109 : sizeof(*thread_data->td->left_pred_buf)));
110 0 : CHECK_MEM_ERROR(
111 : cm, thread_data->td->wsrc_buf,
112 : (int32_t *)aom_memalign(
113 : 16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf)));
114 0 : CHECK_MEM_ERROR(
115 : cm, thread_data->td->mask_buf,
116 : (int32_t *)aom_memalign(
117 : 16, MAX_SB_SQUARE * sizeof(*thread_data->td->mask_buf)));
118 : #endif
119 : // Allocate frame counters in thread data.
120 0 : CHECK_MEM_ERROR(cm, thread_data->td->counts,
121 : aom_calloc(1, sizeof(*thread_data->td->counts)));
122 :
123 : #if CONFIG_PALETTE
124 : // Allocate buffers used by palette coding mode.
125 0 : if (cpi->common.allow_screen_content_tools) {
126 0 : CHECK_MEM_ERROR(
127 : cm, thread_data->td->palette_buffer,
128 : aom_memalign(16, sizeof(*thread_data->td->palette_buffer)));
129 : }
130 : #endif // CONFIG_PALETTE
131 :
132 : // Create threads
133 0 : if (!winterface->reset(worker))
134 0 : aom_internal_error(&cm->error, AOM_CODEC_ERROR,
135 : "Tile encoder thread creation failed");
136 : } else {
137 : // Main thread acts as a worker and uses the thread data in cpi.
138 0 : thread_data->td = &cpi->td;
139 : }
140 :
141 0 : winterface->sync(worker);
142 : }
143 : }
144 :
145 0 : for (i = 0; i < num_workers; i++) {
146 0 : AVxWorker *const worker = &cpi->workers[i];
147 : EncWorkerData *thread_data;
148 :
149 0 : worker->hook = (AVxWorkerHook)enc_worker_hook;
150 0 : worker->data1 = &cpi->tile_thr_data[i];
151 0 : worker->data2 = NULL;
152 0 : thread_data = (EncWorkerData *)worker->data1;
153 :
154 : // Before encoding a frame, copy the thread data from cpi.
155 0 : if (thread_data->td != &cpi->td) {
156 0 : thread_data->td->mb = cpi->td.mb;
157 0 : thread_data->td->rd_counts = cpi->td.rd_counts;
158 : #if CONFIG_MOTION_VAR
159 0 : thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf;
160 0 : thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf;
161 0 : thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf;
162 0 : thread_data->td->mb.mask_buf = thread_data->td->mask_buf;
163 : #endif
164 : }
165 0 : if (thread_data->td->counts != &cpi->common.counts) {
166 0 : memcpy(thread_data->td->counts, &cpi->common.counts,
167 : sizeof(cpi->common.counts));
168 : }
169 :
170 : #if CONFIG_PALETTE
171 0 : if (cpi->common.allow_screen_content_tools && i < num_workers - 1)
172 0 : thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer;
173 : #endif // CONFIG_PALETTE
174 : }
175 :
176 : // Encode a frame
177 0 : for (i = 0; i < num_workers; i++) {
178 0 : AVxWorker *const worker = &cpi->workers[i];
179 0 : EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
180 :
181 : // Set the starting tile for each thread.
182 0 : thread_data->start = i;
183 :
184 0 : if (i == cpi->num_workers - 1)
185 0 : winterface->execute(worker);
186 : else
187 0 : winterface->launch(worker);
188 : }
189 :
190 : // Encoding ends.
191 0 : for (i = 0; i < num_workers; i++) {
192 0 : AVxWorker *const worker = &cpi->workers[i];
193 0 : winterface->sync(worker);
194 : }
195 :
196 0 : for (i = 0; i < num_workers; i++) {
197 0 : AVxWorker *const worker = &cpi->workers[i];
198 0 : EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
199 :
200 : // Accumulate counters.
201 0 : if (i < cpi->num_workers - 1) {
202 0 : av1_accumulate_frame_counts(&cm->counts, thread_data->td->counts);
203 0 : accumulate_rd_opt(&cpi->td, thread_data->td);
204 : #if CONFIG_VAR_TX
205 0 : cpi->td.mb.txb_split_count += thread_data->td->mb.txb_split_count;
206 : #endif
207 : }
208 : }
209 0 : }
|