[PATCH for-next 0/9] LLVM coverage support for Xen

Hello,

The following patch series enables LLVM coverage support for the Xen
hypervisor. This first patches are a re-organization of the gcov
support, in order to make the support generic for all coverage
technologies. This is mostly a name change from gcov -> cov in several
places and files, together with the addition of a Kconfig option in
order to enable LLVM coverage.

Patch 7 introduces the actual LLVM coverage support code that allows
fetching the coverage data from Xen. Finally patch 9 adds the
documentation on how to use this feature.

A sample coverage report obtained after booting a PVHv2 Dom0 can be
found at:

http://xenbits.xen.org/people/royger/xen_profile/

Thanks, Roger.

Roger Pau Monne (9):
  gcov: return ENOSYS for unimplemented gcov domctl
  gcov: rename folder and header to coverage
  gcov: rename sysctl and functions
  gcov: introduce hooks for the sysctl
  coverage: introduce generic file
  kconfig: add llvm coverage option
  coverage: introduce support for llvm profiling
  xsm: add bodge when compiling with llvm coverage support
  coverage: add documentation for LLVM coverage

docs/misc/coverage.markdown | 47 ++++++++++
tools/misc/xencov.c | 28 +++---
xen/Kconfig.debug | 16 ++++
xen/Rules.mk | 4 +
xen/common/Makefile | 2 +-
xen/common/{gcov => coverage}/Makefile | 6 +-
xen/common/coverage/coverage.c | 71 ++++++++++++++
xen/common/{gcov => coverage}/gcc_3_4.c | 0
xen/common/{gcov => coverage}/gcc_4_7.c | 0
xen/common/{gcov => coverage}/gcc_4_9.c | 0
xen/common/{gcov => coverage}/gcc_5.c | 0
xen/common/{gcov => coverage}/gcc_7.c | 0
xen/common/{gcov => coverage}/gcov.c | 44 ++-------
xen/common/{gcov => coverage}/gcov.h | 0
xen/common/{gcov => coverage}/gcov_base.c | 0
xen/common/coverage/llvm.c | 148 ++++++++++++++++++++++++++++++
xen/common/sysctl.c | 8 +-
xen/include/public/sysctl.h | 18 ++--
xen/include/xen/coverage.h | 17 ++++
xen/include/xen/gcov.h | 9 --
xen/include/xsm/dummy.h | 14 +++
21 files changed, 359 insertions(+), 73 deletions(-)
rename xen/common/{gcov => coverage}/Makefile (82%)
create mode 100644 xen/common/coverage/coverage.c
rename xen/common/{gcov => coverage}/gcc_3_4.c (100%)
rename xen/common/{gcov => coverage}/gcc_4_7.c (100%)
rename xen/common/{gcov => coverage}/gcc_4_9.c (100%)
rename xen/common/{gcov => coverage}/gcc_5.c (100%)
rename xen/common/{gcov => coverage}/gcc_7.c (100%)
rename xen/common/{gcov => coverage}/gcov.c (88%)
rename xen/common/{gcov => coverage}/gcov.h (100%)
rename xen/common/{gcov => coverage}/gcov_base.c (100%)
create mode 100644 xen/common/coverage/llvm.c
create mode 100644 xen/include/xen/coverage.h
delete mode 100644 xen/include/xen/gcov.h

Introduce the functionality in order to fill the hooks of the
cov_sysctl_ops struct.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

--- /dev/null
+++ b/xen/common/coverage/llvm.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2017 Citrix Systems R&D
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <xen/errno.h>
+#include <xen/guest_access.h>
+#include <xen/types.h>
+#include <xen/coverage.h>
+
+#ifndef __clang__
+#error "LLVM coverage selected without clang compiler"
+#endif
+
+#define LLVM_PROFILE_MAGIC_64 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \
+ (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \
+ (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129
+#define LLVM_PROFILE_MAGIC_32 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \
+ (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \
+ (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129

Judging by the file having Xen style, I imply this isn't intended to
very closely match some other original. With that, parentheses
should be added around all the shift operations above.

+#if __clang_major__ >= 4 || (__clang_major__ == 3 && __clang_minor__ == 9)

Is it certain that there's never going to be a 3.10? Otherwise

= might be more suitable for the minor version check.

+int __llvm_profile_runtime;

This isn't used anywhere - please add a brief comment explaining
the need for it (the more that its type being plain int is also
suspicious).

+extern struct llvm_profile_data __start___llvm_prf_data;
+extern struct llvm_profile_data __stop___llvm_prf_data;
+extern uint64_t __start___llvm_prf_cnts;
+extern uint64_t __stop___llvm_prf_cnts;
+extern char __start___llvm_prf_names;
+extern char __stop___llvm_prf_names;

I guess all of these really want to have added, making ...

+static void *start_data = &__start___llvm_prf_data;
+static void *end_data = &__stop___llvm_prf_data;
+static void *start_counters = &__start___llvm_prf_cnts;
+static void *end_counters = &__stop___llvm_prf_cnts;
+static void *start_names = &__start___llvm_prf_names;
+static void *end_names = &__stop___llvm_prf_names;

... the &s here unnecessary. But then - do these really need to
be statics (rather than #define-s)?

I would also guess that at least the names ones could be const.

+static int dump(XEN_GUEST_HANDLE_PARAM(char) buffer, uint32_t *buf_size)
+{
+ struct llvm_profile_header header = {
+#if BITS_PER_LONG == 64
+ .magic = LLVM_PROFILE_MAGIC_64,
+#else
+ .magic = LLVM_PROFILE_MAGIC_32,
+#endif

I think this should just be LLVM_PROFILE_MAGIC, with the #ifdef
moved to the #define-s.

+ .version = LLVM_PROFILE_VERSION,
+ .data_size = (end_data - start_data) / sizeof(struct llvm_profile_data),
+ .counters_size = (end_counters - start_counters) / sizeof(uint64_t),
+ .names_size = end_names - start_names,
+ .counters_delta = (uintptr_t)start_counters,
+ .names_delta = (uintptr_t)start_names,
+ .value_kind_last = LLVM_PROFILE_NUM_KINDS - 1,
+ };
+ unsigned int off = 0;
+
+#define APPEND_TO_BUFFER(src, size) \
+({ \
+ if ( off + size > *buf_size ) \

(size)

+ return -ENOMEM; \
+ copy_to_guest_offset(buffer, off, src, size); \
+ off += size; \

Ideally here too, albeit only the comma operator has lower
precedence, and that would require parenthesizing in the macro
invocation already (which is also why the arguments to
copy_to_guest_offset() explicitly do not need extra parentheses
added).

+})
+ APPEND_TO_BUFFER((char *)&header, sizeof(struct llvm_profile_header));

sizeof(header)

+ APPEND_TO_BUFFER((char *)start_data, end_data - start_data);
+ APPEND_TO_BUFFER((char *)start_counters, end_counters - start_counters);
+ APPEND_TO_BUFFER((char *)start_names, end_names - start_names);

The casts should all be to const char*, and perhaps that would
better be done inside the macro anyway.

+#undef APPEND_TO_BUFFER
+
+ clear_guest_offset(buffer, off, *buf_size - off);
+
+ return 0;
+}
+
+struct cov_sysctl_ops cov_ops = {

const

--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -646,6 +646,12 @@ struct xen_sysctl_scheduler_op {

#define XEN_GCOV_FORMAT_MAGIC 0x58434f56 /* XCOV */

Hmm, shouldn't the private magic #define-s actually be put here
(in which case you'd indeed need to retain both 32- and 64-bit
variants)?

Jan

> --- /dev/null
> +++ b/xen/common/coverage/llvm.c
> +#define LLVM_PROFILE_MAGIC_64 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \
> + (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \
> + (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129
> +#define LLVM_PROFILE_MAGIC_32 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \
> + (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \
> + (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129

Judging by the file having Xen style, I imply this isn't intended to
very closely match some other original. With that, parentheses
should be added around all the shift operations above.

> +#if __clang_major__ >= 4 || (__clang_major__ == 3 && __clang_minor__ == 9)

Is it certain that there's never going to be a 3.10? Otherwise
>= might be more suitable for the minor version check.

No, there's not going to be a clang 3.10.

> +int __llvm_profile_runtime;

This isn't used anywhere - please add a brief comment explaining
the need for it (the more that its type being plain int is also
suspicious).

This is an internal symbol that must be present because Xen is not
linked against the run-time coverage library. It's described as an int
here:

https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#using-the-profiling-runtime-without-static-initializers

Without this symbol linkage fails.

> +extern struct llvm_profile_data __start___llvm_prf_data;
> +extern struct llvm_profile_data __stop___llvm_prf_data;
> +extern uint64_t __start___llvm_prf_cnts;
> +extern uint64_t __stop___llvm_prf_cnts;
> +extern char __start___llvm_prf_names;
> +extern char __stop___llvm_prf_names;

I guess all of these really want to have added, making ...

> +static void *start_data = &__start___llvm_prf_data;
> +static void *end_data = &__stop___llvm_prf_data;
> +static void *start_counters = &__start___llvm_prf_cnts;
> +static void *end_counters = &__stop___llvm_prf_cnts;
> +static void *start_names = &__start___llvm_prf_names;
> +static void *end_names = &__stop___llvm_prf_names;

... the &s here unnecessary. But then - do these really need to
be statics (rather than #define-s)?

I would also guess that at least the names ones could be const.

Could make them defines indeed.

> + APPEND_TO_BUFFER((char *)start_data, end_data - start_data);
> + APPEND_TO_BUFFER((char *)start_counters, end_counters - start_counters);
> + APPEND_TO_BUFFER((char *)start_names, end_names - start_names);

The casts should all be to const char*, and perhaps that would
better be done inside the macro anyway.

Seems better indeed.

> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -646,6 +646,12 @@ struct xen_sysctl_scheduler_op {
>
> #define XEN_GCOV_FORMAT_MAGIC 0x58434f56 /* XCOV */

Hmm, shouldn't the private magic #define-s actually be put here
(in which case you'd indeed need to retain both 32- and 64-bit
variants)?

I don't think so, here XEN_GCOV_FORMAT_MAGIC is a Xen specific gcov
magic number.

OTOH LLVM_PROFILE_MAGIC_{64/32} is an llvm defined magic number,
that's not under our control. Hence I don't think it should be
exported in Xen public headers.

Roger.

Okay.

Jan