summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gem/i915_gem_busy.c
blob: 7358bebef15c1a8691e8902dcca7ce2c85f48b47 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright © 2014-2016 Intel Corporation
 */

#include <linux/dma-fence-array.h>

#include "gt/intel_engine.h"

#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"

static __always_inline u32 __busy_read_flag(u16 id)
{
	if (id == (u16)I915_ENGINE_CLASS_INVALID)
		return 0xffff0000u;

	GEM_BUG_ON(id >= 16);
	return 0x10000u << id;
}

static __always_inline u32 __busy_write_id(u16 id)
{
	/*
	 * The uABI guarantees an active writer is also amongst the read
	 * engines. This would be true if we accessed the activity tracking
	 * under the lock, but as we perform the lookup of the object and
	 * its activity locklessly we can not guarantee that the last_write
	 * being active implies that we have set the same engine flag from
	 * last_read - hence we always set both read and write busy for
	 * last_write.
	 */
	if (id == (u16)I915_ENGINE_CLASS_INVALID)
		return 0xffffffffu;

	return (id + 1) | __busy_read_flag(id);
}

static __always_inline unsigned int
__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
{
	const struct i915_request *rq;

	/*
	 * We have to check the current hw status of the fence as the uABI
	 * guarantees forward progress. We could rely on the idle worker
	 * to eventually flush us, but to minimise latency just ask the
	 * hardware.
	 *
	 * Note we only report on the status of native fences and we currently
	 * have two native fences:
	 *
	 * 1. A composite fence (dma_fence_array) constructed of i915 requests
	 * created during a parallel submission. In this case we deconstruct the
	 * composite fence into individual i915 requests and check the status of
	 * each request.
	 *
	 * 2. A single i915 request.
	 */
	if (dma_fence_is_array(fence)) {
		struct dma_fence_array *array = to_dma_fence_array(fence);
		struct dma_fence **child = array->fences;
		unsigned int nchild = array->num_fences;

		do {
			struct dma_fence *current_fence = *child++;

			/* Not an i915 fence, can't be busy per above */
			if (!dma_fence_is_i915(current_fence) ||
			    !test_bit(I915_FENCE_FLAG_COMPOSITE,
				      &current_fence->flags)) {
				return 0;
			}

			rq = to_request(current_fence);
			if (!i915_request_completed(rq))
				return flag(rq->engine->uabi_class);
		} while (--nchild);

		/* All requests in array complete, not busy */
		return 0;
	} else {
		if (!dma_fence_is_i915(fence))
			return 0;

		rq = to_request(fence);
		if (i915_request_completed(rq))
			return 0;

		/* Beware type-expansion follies! */
		BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
		return flag(rq->engine->uabi_class);
	}
}

static __always_inline unsigned int
busy_check_reader(struct dma_fence *fence)
{
	return __busy_set_if_active(fence, __busy_read_flag);
}

static __always_inline unsigned int
busy_check_writer(struct dma_fence *fence)
{
	if (!fence)
		return 0;

	return __busy_set_if_active(fence, __busy_write_id);
}

int
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
		    struct drm_file *file)
{
	struct drm_i915_gem_busy *args = data;
	struct drm_i915_gem_object *obj;
	struct dma_resv_list *list;
	unsigned int seq;
	int err;

	err = -ENOENT;
	rcu_read_lock();
	obj = i915_gem_object_lookup_rcu(file, args->handle);
	if (!obj)
		goto out;

	/*
	 * A discrepancy here is that we do not report the status of
	 * non-i915 fences, i.e. even though we may report the object as idle,
	 * a call to set-domain may still stall waiting for foreign rendering.
	 * This also means that wait-ioctl may report an object as busy,
	 * where busy-ioctl considers it idle.
	 *
	 * We trade the ability to warn of foreign fences to report on which
	 * i915 engines are active for the object.
	 *
	 * Alternatively, we can trade that extra information on read/write
	 * activity with
	 *	args->busy =
	 *		!dma_resv_test_signaled(obj->resv, true);
	 * to report the overall busyness. This is what the wait-ioctl does.
	 *
	 */
retry:
	seq = raw_read_seqcount(&obj->base.resv->seq);

	/* Translate the exclusive fence to the READ *and* WRITE engine */
	args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));

	/* Translate shared fences to READ set of engines */
	list = dma_resv_shared_list(obj->base.resv);
	if (list) {
		unsigned int shared_count = list->shared_count, i;

		for (i = 0; i < shared_count; ++i) {
			struct dma_fence *fence =
				rcu_dereference(list->shared[i]);

			args->busy |= busy_check_reader(fence);
		}
	}

	if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))
		goto retry;

	err = 0;
out:
	rcu_read_unlock();
	return err;
}