summaryrefslogtreecommitdiff
path: root/mali_kbase/mali_kbase_reset_gpu.h
blob: 5063b64ca979412a24b6abd7aca151b5581be642 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
 *
 * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
 *
 * This program is free software and is provided to you under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation, and any use by you of this program is subject to the terms
 * of such GNU license.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, you can access it online at
 * http://www.gnu.org/licenses/gpl-2.0.html.
 *
 */

#ifndef _KBASE_RESET_GPU_H_
#define _KBASE_RESET_GPU_H_

/**
 * kbase_reset_gpu_prevent_and_wait - Prevent GPU resets from starting whilst
 *                                    the current thread is accessing the GPU,
 *                                    and wait for any in-flight reset to
 *                                    finish.
 * @kbdev: Device pointer
 *
 * This should be used when a potential access to the HW is going to be made
 * from a non-atomic context.
 *
 * It will wait for any in-flight reset to finish before returning. Hence,
 * correct lock ordering must be observed with respect to the calling thread
 * and the reset worker thread.
 *
 * This does not synchronize general access to the HW, and so multiple threads
 * can prevent GPU reset concurrently, whilst not being serialized. This is
 * advantageous as the threads can make this call at points where they do not
 * know for sure yet whether they will indeed access the GPU (for example, to
 * respect lock ordering), without unnecessarily blocking others.
 *
 * Threads must still use other synchronization to ensure they access the HW
 * consistently, at a point where they are certain it needs to be accessed.
 *
 * On success, ensure that when access to the GPU by the caller thread has
 * finished, that it calls kbase_reset_gpu_allow() again to allow resets to
 * happen.
 *
 * This may return a failure in cases such as a previous failure to reset the
 * GPU within a reasonable time. If that happens, the GPU might be
 * non-operational and the caller should not attempt any further access.
 *
 * Note:
 * For atomic context, instead check kbase_reset_gpu_is_active().
 *
 * Return: 0 on success, or negative error code on failure.
 */
int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_try_prevent - Attempt to prevent GPU resets from starting
 *                               whilst the current thread is accessing the
 *                               GPU, unless a reset is already in progress.
 * @kbdev: Device pointer
 *
 * Similar to kbase_reset_gpu_prevent_and_wait(), but it does not wait for an
 * existing reset to complete. This can be used on codepaths that the Reset
 * worker waits on, where use of kbase_reset_gpu_prevent_and_wait() would
 * otherwise deadlock.
 *
 * Instead, a reset that is currently happening will cause this function to
 * return an error code indicating that, and further resets will not have been
 * prevented.
 *
 * In such cases, the caller must check for -EAGAIN, and take similar actions
 * as for handling reset in atomic context. That is, they must cancel any
 * actions that depended on reset being prevented, possibly deferring them
 * until after the reset.
 *
 * Otherwise a successful return means that the caller can continue its actions
 * safely in the knowledge that reset is prevented, and the reset worker will
 * correctly wait instead of deadlocking against this thread.
 *
 * On success, ensure that when access to the GPU by the caller thread has
 * finished, that it calls kbase_reset_gpu_allow() again to allow resets to
 * happen.
 *
 * Refer to kbase_reset_gpu_prevent_and_wait() for more information.
 *
 * Return: 0 on success. -EAGAIN if a reset is currently happening. Other
 * negative error codes on failure, where -ENOMEM indicates that GPU reset
 * had failed.
 */
int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_allow - Allow GPU resets to happen again after having been
 *                         previously prevented.
 * @kbdev: Device pointer
 *
 * This should be used when a potential access to the HW has finished from a
 * non-atomic context.
 *
 * It must be used from the same thread that originally made a previously call
 * to kbase_reset_gpu_prevent_and_wait(). It must not be deferred to another
 * thread.
 */
void kbase_reset_gpu_allow(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_assert_prevented - Make debugging checks that GPU reset is
 *                                    currently prevented by the current
 *                                    thread.
 * @kbdev: Device pointer
 *
 * Make debugging checks that the current thread has made a call to
 * kbase_reset_gpu_prevent_and_wait(), but has yet to make a subsequent call to
 * kbase_reset_gpu_allow().
 *
 * CONFIG_LOCKDEP is required to prove that reset is indeed
 * prevented. Otherwise only limited debugging checks can be made.
 */
void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_assert_failed_or_prevented - Make debugging checks that
 *                                              either GPU reset previously
 *                                              failed, or is currently
 *                                              prevented.
 *
 * @kbdev: Device pointer
 *
 * As with kbase_reset_gpu_assert_prevented(), but also allow for paths where
 * reset was not prevented due to a failure, yet we still need to execute the
 * cleanup code following.
 *
 * Cleanup code following this call must handle any inconsistent state modified
 * by the failed GPU reset, and must timeout any blocking operations instead of
 * waiting forever.
 */
void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_failed - Return whether a previous GPU reset failed.
 *
 * @kbdev: Device pointer
 *
 */
bool kbase_reset_gpu_failed(struct kbase_device *kbdev);

/**
 * RESET_FLAGS_NONE - Flags for kbase_prepare_to_reset_gpu
 */
#define RESET_FLAGS_NONE (0U)

/* This reset should be treated as an unrecoverable error by HW counter logic */
#define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0))

/* pixel: Powercycle the GPU instead of attempting a soft/hard reset (only used on CSF hw). */
#define RESET_FLAGS_FORCE_PM_HW_RESET ((unsigned int)(1 << 1))

/**
 * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
 * @kbdev: Device pointer
 * @flags: Bitfield indicating impact of reset (see flag defines)
 *
 * Caller is expected to hold the kbdev->hwaccess_lock.
 *
 * Return: a boolean which should be interpreted as follows:
 * - true  - Prepared for reset, kbase_reset_gpu should be called.
 * - false - Another thread is performing a reset, kbase_reset_gpu should
 *           not be called.
 */
bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
				       unsigned int flags);

/**
 * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
 * @kbdev: Device pointer
 * @flags: Bitfield indicating impact of reset (see flag defines)
 *
 * Return: a boolean which should be interpreted as follows:
 * - true  - Prepared for reset, kbase_reset_gpu should be called.
 * - false - Another thread is performing a reset, kbase_reset_gpu should
 *           not be called.
 */
bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags);

/**
 * kbase_reset_gpu - Reset the GPU
 * @kbdev: Device pointer
 *
 * This function should be called after kbase_prepare_to_reset_gpu if it returns
 * true. It should never be called without a corresponding call to
 * kbase_prepare_to_reset_gpu (only on Job Manager GPUs).
 *
 * After this function is called the caller should call kbase_reset_gpu_wait()
 * to know when the reset has completed.
 */
void kbase_reset_gpu(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_locked - Reset the GPU
 * @kbdev: Device pointer
 *
 * This function should be called after kbase_prepare_to_reset_gpu_locked if it
 * returns true. It should never be called without a corresponding call to
 * kbase_prepare_to_reset_gpu (only on Job Manager GPUs).
 * Caller is expected to hold the kbdev->hwaccess_lock.
 *
 * After this function is called, the caller should call kbase_reset_gpu_wait()
 * to know when the reset has completed.
 */
void kbase_reset_gpu_locked(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_silent - Reset the GPU silently
 * @kbdev: Device pointer
 *
 * Reset the GPU without trying to cancel jobs (applicable to Job Manager GPUs)
 * and don't emit messages into the kernel log while doing the reset.
 *
 * This function should be used in cases where we are doing a controlled reset
 * of the GPU as part of normal processing (e.g. exiting protected mode) where
 * the driver will have ensured the scheduler has been idled and all other
 * users of the GPU (e.g. instrumentation) have been suspended.
 *
 * Return: 0 if the reset was started successfully
 *         -EAGAIN if another reset is currently in progress
 */
int kbase_reset_gpu_silent(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_is_active - Reports if the GPU is being reset
 * @kbdev: Device pointer
 *
 * Any changes made to the HW when this returns true may be lost, overwritten
 * or corrupted.
 *
 * Note that unless appropriate locks are held when using this function, the
 * state could change immediately afterwards.
 *
 * Return: True if the GPU is in the process of being reset.
 */
bool kbase_reset_gpu_is_active(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_not_pending - Reports if the GPU reset isn't pending
 *
 * @kbdev: Device pointer
 *
 * Note that unless appropriate locks are held when using this function, the
 * state could change immediately afterwards.
 *
 * Return: True if the GPU reset isn't pending.
 */
bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_wait - Wait for a GPU reset to complete
 * @kbdev: Device pointer
 *
 * This function may wait indefinitely.
 *
 * Return: 0 if successful or a negative error code on failure.
 */
int kbase_reset_gpu_wait(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_init - Initialize the GPU reset handling mechanism.
 *
 * @kbdev: Device pointer
 *
 * Return: 0 if successful or a negative error code on failure.
 */
int kbase_reset_gpu_init(struct kbase_device *kbdev);

/**
 * kbase_reset_gpu_term - Terminate the GPU reset handling mechanism.
 *
 * @kbdev: Device pointer
 */
void kbase_reset_gpu_term(struct kbase_device *kbdev);

#endif