summaryrefslogtreecommitdiffstats
path: root/io_uring/rsrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring/rsrc.c')
-rw-r--r--io_uring/rsrc.c149
1 files changed, 108 insertions, 41 deletions
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 453867add7ca..e8639993b61e 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -855,6 +855,98 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
return ret;
}
+static bool io_do_coalesce_buffer(struct page ***pages, int *nr_pages,
+ struct io_imu_folio_data *data, int nr_folios)
+{
+ struct page **page_array = *pages, **new_array = NULL;
+ int nr_pages_left = *nr_pages, i, j;
+
+ /* Store head pages only*/
+ new_array = kvmalloc_array(nr_folios, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!new_array)
+ return false;
+
+ new_array[0] = compound_head(page_array[0]);
+ /*
+ * The pages are bound to the folio, it doesn't
+ * actually unpin them but drops all but one reference,
+ * which is usually put down by io_buffer_unmap().
+ * Note, needs a better helper.
+ */
+ if (data->nr_pages_head > 1)
+ unpin_user_pages(&page_array[1], data->nr_pages_head - 1);
+
+ j = data->nr_pages_head;
+ nr_pages_left -= data->nr_pages_head;
+ for (i = 1; i < nr_folios; i++) {
+ unsigned int nr_unpin;
+
+ new_array[i] = page_array[j];
+ nr_unpin = min_t(unsigned int, nr_pages_left - 1,
+ data->nr_pages_mid - 1);
+ if (nr_unpin)
+ unpin_user_pages(&page_array[j+1], nr_unpin);
+ j += data->nr_pages_mid;
+ nr_pages_left -= data->nr_pages_mid;
+ }
+ kvfree(page_array);
+ *pages = new_array;
+ *nr_pages = nr_folios;
+ return true;
+}
+
+static bool io_try_coalesce_buffer(struct page ***pages, int *nr_pages,
+ struct io_imu_folio_data *data)
+{
+ struct page **page_array = *pages;
+ struct folio *folio = page_folio(page_array[0]);
+ unsigned int count = 1, nr_folios = 1;
+ int i;
+
+ if (*nr_pages <= 1)
+ return false;
+
+ data->nr_pages_mid = folio_nr_pages(folio);
+ if (data->nr_pages_mid == 1)
+ return false;
+
+ data->folio_shift = folio_shift(folio);
+ /*
+ * Check if pages are contiguous inside a folio, and all folios have
+ * the same page count except for the head and tail.
+ */
+ for (i = 1; i < *nr_pages; i++) {
+ if (page_folio(page_array[i]) == folio &&
+ page_array[i] == page_array[i-1] + 1) {
+ count++;
+ continue;
+ }
+
+ if (nr_folios == 1) {
+ if (folio_page_idx(folio, page_array[i-1]) !=
+ data->nr_pages_mid - 1)
+ return false;
+
+ data->nr_pages_head = count;
+ } else if (count != data->nr_pages_mid) {
+ return false;
+ }
+
+ folio = page_folio(page_array[i]);
+ if (folio_size(folio) != (1UL << data->folio_shift) ||
+ folio_page_idx(folio, page_array[i]) != 0)
+ return false;
+
+ count = 1;
+ nr_folios++;
+ }
+ if (nr_folios == 1)
+ data->nr_pages_head = count;
+
+ return io_do_coalesce_buffer(pages, nr_pages, data, nr_folios);
+}
+
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
struct io_mapped_ubuf **pimu,
struct page **last_hpage)
@@ -864,7 +956,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
unsigned long off;
size_t size;
int ret, nr_pages, i;
- struct folio *folio = NULL;
+ struct io_imu_folio_data data;
+ bool coalesced;
*pimu = (struct io_mapped_ubuf *)&dummy_ubuf;
if (!iov->iov_base)
@@ -879,31 +972,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
goto done;
}
- /* If it's a huge page, try to coalesce them into a single bvec entry */
- if (nr_pages > 1) {
- folio = page_folio(pages[0]);
- for (i = 1; i < nr_pages; i++) {
- /*
- * Pages must be consecutive and on the same folio for
- * this to work
- */
- if (page_folio(pages[i]) != folio ||
- pages[i] != pages[i - 1] + 1) {
- folio = NULL;
- break;
- }
- }
- if (folio) {
- /*
- * The pages are bound to the folio, it doesn't
- * actually unpin them but drops all but one reference,
- * which is usually put down by io_buffer_unmap().
- * Note, needs a better helper.
- */
- unpin_user_pages(&pages[1], nr_pages - 1);
- nr_pages = 1;
- }
- }
+ /* If it's huge page(s), try to coalesce them into fewer bvec entries */
+ coalesced = io_try_coalesce_buffer(&pages, &nr_pages, &data);
imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
if (!imu)
@@ -915,23 +985,25 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
goto done;
}
- off = (unsigned long) iov->iov_base & ~PAGE_MASK;
size = iov->iov_len;
/* store original address for later verification */
imu->ubuf = (unsigned long) iov->iov_base;
imu->ubuf_end = imu->ubuf + iov->iov_len;
imu->nr_bvecs = nr_pages;
+ imu->folio_shift = PAGE_SHIFT;
+ imu->folio_mask = PAGE_MASK;
+ if (coalesced) {
+ imu->folio_shift = data.folio_shift;
+ imu->folio_mask = ~((1UL << data.folio_shift) - 1);
+ }
+ off = (unsigned long) iov->iov_base & ~imu->folio_mask;
*pimu = imu;
ret = 0;
- if (folio) {
- bvec_set_page(&imu->bvec[0], pages[0], size, off);
- goto done;
- }
for (i = 0; i < nr_pages; i++) {
size_t vec_len;
- vec_len = min_t(size_t, size, PAGE_SIZE - off);
+ vec_len = min_t(size_t, size, (1UL << imu->folio_shift) - off);
bvec_set_page(&imu->bvec[i], pages[i], vec_len, off);
off = 0;
size -= vec_len;
@@ -1042,23 +1114,18 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
* we know that:
*
* 1) it's a BVEC iter, we set it up
- * 2) all bvecs are PAGE_SIZE in size, except potentially the
+ * 2) all bvecs are the same in size, except potentially the
* first and last bvec
*
* So just find our index, and adjust the iterator afterwards.
* If the offset is within the first bvec (or the whole first
* bvec, just use iov_iter_advance(). This makes it easier
* since we can just skip the first segment, which may not
- * be PAGE_SIZE aligned.
+ * be folio_size aligned.
*/
const struct bio_vec *bvec = imu->bvec;
if (offset < bvec->bv_len) {
- /*
- * Note, huge pages buffers consists of one large
- * bvec entry and should always go this way. The other
- * branch doesn't expect non PAGE_SIZE'd chunks.
- */
iter->bvec = bvec;
iter->count -= offset;
iter->iov_offset = offset;
@@ -1067,12 +1134,12 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
/* skip first vec */
offset -= bvec->bv_len;
- seg_skip = 1 + (offset >> PAGE_SHIFT);
+ seg_skip = 1 + (offset >> imu->folio_shift);
iter->bvec = bvec + seg_skip;
iter->nr_segs -= seg_skip;
iter->count -= bvec->bv_len + offset;
- iter->iov_offset = offset & ~PAGE_MASK;
+ iter->iov_offset = offset & ~imu->folio_mask;
}
}