Attached is what I got thus far.
What I'm struggling with is proper integration in build system. What
is in there is just wild guesses from my side, both on autoconf and
cmake variants. It would be great if someone with proper knowledge of
the buildsystems could have a look.
I don't know if I qualify for that, but it seems to be working for me 
Also I'm not sure how to properly
handle compilation on msvc - clearly "-std=c11 -Wstrict-prototypes" is
not the options I should use in that case. (also the dup2 use, will
that work on windows?).
I think we should just punt on compilation with msvc for now. When I
tried, it wouldn't even compile the headers in include/llvm-c/. I
think we should fix this, but your patch shouldn't be blocked on it.
We should try to avoid any inherently msvc-hostile code, though. Like
the dup2 stuff - can we change llvm-c to allow for dumping a module to
stdout instead of stderr? Redirecting the streams is kind of hacky
anyway.
More comments inline:
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -40,6 +40,8 @@ add_llvm_tool_subdirectory(llvm-mcmarkup)
add_llvm_tool_subdirectory(llvm-symbolizer)
+add_llvm_tool_subdirectory(llvm-c-test)
I think we should just wrap this in an "if (NOT MSVC)" clause for now.
This way we can get your patch landed, and then work on getting it and
llvm-c to build with msvc if that seems feasible. Right now for
example, msvc chokes on all the "static inline" in the header files.
--- /dev/null
+++ b/tools/llvm-c-test/calc.c
@@ -0,0 +1,121 @@
+/*===-- calc.c - tool for testing libLLVM and llvm-c API ------------------===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This file implements the --calc command in llvm-c-test. --calc reads lines *|
+|* from stdin, parses them as a name and an expression in reverse polish *|
+|* notation and prints a module with a function with the expression. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c-test.h"
+#include "llvm-c/Core.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef LLVMValueRef (*binop_func_t)(LLVMBuilderRef, LLVMValueRef LHS,
+ LLVMValueRef RHS, const char *Name);
+
+static const LLVMOpcode binop = {['+'] = LLVMAdd,
Nice! But I suspect MSVC will have a problem with this 
+ ['-'] = LLVMSub,
+ ['*'] = LLVMMul,
+ ['/'] = LLVMSDiv,
+ ['&'] = LLVMAnd,
+ ['|'] = LLVMOr,
+ ['^'] = LLVMXor};
+
+static void handle_line(char **tokens, int ntokens) {
+ char *name = tokens[0];
+
+ LLVMModuleRef M = LLVMModuleCreateWithName(name);
+
+ LLVMTypeRef I64ty = LLVMInt64Type();
+
+ LLVMTypeRef Fty = LLVMFunctionType(
+ I64ty, (LLVMTypeRef[1]) { LLVMPointerType(I64ty, 0) }, 1, 0);
+
+ LLVMValueRef F = LLVMAddFunction(M, name, Fty);
+ LLVMValueRef param;
+ LLVMBasicBlockRef BB = LLVMAppendBasicBlock(F, "entry");
+ LLVMBuilderRef builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, BB);
+
+ LLVMGetParams(F, ¶m);
+
+ LLVMSetValueName(param, "in");
+
+ char *end;
+
+ LLVMValueRef stack[32];
+ int depth = 0;
+
+ for (int i = 1; i < ntokens; i++) {
Ultra nit: llvm style likes preincrement better. Not sure how much
this makes sense in C context, but may be worth sticking to for
consistency.
+ char tok = tokens[i][0];
+ switch (tok) {
+ case '+':
+ case '-':
+ case '*':
+ case '/':
+ case '&':
+ case '|':
+ case '^':
+ if (depth < 2) {
+ printf("stack underflow\n");
+ return;
+ }
+ stack[depth - 2] = LLVMBuildBinOp(builder, binop[tok], stack[depth - 1], stack[depth - 2], "");
+ depth--;
+ break;
+ case '@':
+ if (depth < 1) {
+ printf("stack underflow\n");
+ return;
+ }
+ LLVMValueRef off = LLVMBuildGEP(builder, param, &stack[depth - 1], 1, "");
+ stack[depth - 1] = LLVMBuildLoad(builder, off, "");
+ break;
+ default: {
+ long val = strtol(tokens[i], &end, 0);
+ if (end[0] != '\0') {
+ printf("error parsing\n");
+ return;
+ }
+ if (depth >= 32) {
Maybe break out 32 to a #define ?
+++ b/tools/llvm-c-test/helpers.c
@@ -0,0 +1,50 @@
+/*===-- helpers.c - tool for testing libLLVM and llvm-c API ---------------===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* Helper functions *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c-test.h"
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+void tokenize_stdin(void (*cb)(char **tokens, int ntokens)) {
+ char line[1024];
+ char *tokbuf[512];
+
+ while (fgets(line, sizeof(line), stdin)) {
+ int c = 0;
+
+ if (line[0] == ';' || line[0] == '\n')
+ continue;
+
+ while (c < 512) {
Use sizeof(tokbuf) instead of 512?
+ tokbuf[c] = strtok(c ? NULL : line, " \n");
+ if (!tokbuf[c])
+ break;
+ c++;
+ }
+ if (c)
+ cb(tokbuf, c);
+ }
+}
+
+int redirect_stderr_to_stdout() {
Should this be redirect_stderr_to_stdout(void) ?
+ int old;
+ old = dup(STDERR_FILENO);
+ dup2(STDOUT_FILENO, STDERR_FILENO);
Yeah, this won't work well on Windows.
If the purpose of these redirect_ functions is just to be able to dump
modules to stdout rather than stderr, maybe it's LLVMDumpModule that
should be fixed, or needs an alternative function?
diff --git a/tools/llvm-c-test/include-all.c b/tools/llvm-c-test/include-all.c
new file mode 100644
index 0000000..a5f5742
--- /dev/null
+++ b/tools/llvm-c-test/include-all.c
@@ -0,0 +1,31 @@
+/*===-- include-all.c - tool for testing libLLVM and llvm-c API -----------===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This file doesn't have any actual code. It just make sure that all *|
+|* the llvm-c include files are good and doesn't generate any warnings *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c/Analysis.h"
+#include "llvm-c/BitReader.h"
+#include "llvm-c/BitWriter.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/Disassembler.h"
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm-c/Initialization.h"
+#include "llvm-c/LinkTimeOptimizer.h"
+#include "llvm-c/Linker.h"
+#include "llvm-c/Object.h"
+#include "llvm-c/Target.h"
+#include "llvm-c/TargetMachine.h"
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm-c/Transforms/PassManagerBuilder.h"
+#include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Transforms/Vectorize.h"
+#include "llvm-c/lto.h"
I wonder if the build system could generate this list of files for us
somehow. Could probably just be a FIXME for now, though.
+++ b/tools/llvm-c-test/module.c
@@ -0,0 +1,131 @@
+/*===-- module.c - tool for testing libLLVM and llvm-c API ----------------===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
+|* This file implements the --module-dump, --module-list-functions and *|
+|* --module-list-globals commands in llvm-c-test. *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c-test.h"
+#include "llvm-c/BitReader.h"
+#include "llvm-c/Core.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
Is unistd.h actually needed for this file?
+
+static LLVMModuleRef load_module(void) {
+ LLVMMemoryBufferRef MB;
+ LLVMModuleRef M;
+ char *msg = NULL;
+
+ if (LLVMCreateMemoryBufferWithSTDIN(&MB, &msg)) {
+ fprintf(stderr, "Error reading file: %s\n", msg);
+ exit(1);
+ }
+
+ if (LLVMParseBitcode(MB, &M, &msg)) {
+ fprintf(stderr, "Error parsing bitcode: %s\n", msg);
+ exit(1);
+ }
+
+ return M;
+}
+
+int module_dump(void) {
+ LLVMModuleRef M = load_module();
+
+ int tmpfd = redirect_stderr_to_stdout();
+ LLVMDumpModule(M);
+ unredirect_stderr_to_stdout(tmpfd);
+
+ LLVMDisposeModule(M);
+
+ return 0;
+}
+
+int module_list_functions(void) {
+ LLVMModuleRef M = load_module();
+ LLVMValueRef f;
+
+ f = LLVMGetFirstFunction(M);
+ while (f) {
+ if (LLVMIsDeclaration(f)) {
+ printf("FunctionDeclaration: %s\n", LLVMGetValueName(f));
+ } else {
+ unsigned nisn = 0;
+ unsigned nbb = 0;
+
+ printf("FunctionDefinition: %s [#bb=%u]\n", LLVMGetValueName(f),
+ LLVMCountBasicBlocks(f));
+
+ for (LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(f); bb;
+ bb = LLVMGetNextBasicBlock(bb)) {
+ nbb++;
+ for (LLVMValueRef isn = LLVMGetFirstInstruction(bb); isn;
+ isn = LLVMGetNextInstruction(isn)) {
+ nisn++;
+ if (LLVMIsACallInst(isn)) {
+ LLVMValueRef callee =
+ LLVMGetOperand(isn, LLVMGetNumOperands(isn) - 1);
+ printf(" calls: %s\n", LLVMGetValueName(callee));
+ }
+ }
+ }
+ printf(" #isn: %u\n", nisn);
+ printf(" #bb: %u\n\n", nbb);
+ }
+ f = LLVMGetNextFunction(f);
+ }
+
+ LLVMDisposeModule(M);
+
+ return 0;
+}
+
+static const char *TypeKindNames =
+ {[LLVMVoidTypeKind] = "VoidTypeKind",
+ [LLVMHalfTypeKind] = "HalfTypeKind",
+ [LLVMFloatTypeKind] = "FloatTypeKind",
+ [LLVMDoubleTypeKind] = "DoubleTypeKind",
+ [LLVMX86_FP80TypeKind] = "X86_FP80TypeKind",
+ [LLVMFP128TypeKind] = "FP128TypeKind",
+ [LLVMPPC_FP128TypeKind] = "PPC_FP128TypeKind",
+ [LLVMLabelTypeKind] = "LabelTypeKind",
+ [LLVMIntegerTypeKind] = "IntegerTypeKind",
+ [LLVMFunctionTypeKind] = "FunctionTypeKind",
+ [LLVMStructTypeKind] = "StructTypeKind",
+ [LLVMArrayTypeKind] = "ArrayTypeKind",
+ [LLVMPointerTypeKind] = "PointerTypeKind",
+ [LLVMVectorTypeKind] = "VectorTypeKind",
+ [LLVMMetadataTypeKind] = "MetadataTypeKind",
+ [LLVMX86_MMXTypeKind] = "X86_MMXTypeKind", };
Again, I don't think MSVC will be happy. But with a switch-case
thingy, LLVM will generate the equivalent (some day it might even be
better) code 
--- /dev/null
+++ b/test/Bindings/llvm-c/disassemble.test
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | llvm-c-test --module-list-functions | FileCheck %s
I guess this test will only work for builds that include both the
x86_64 and arm targets, so the lit config file should probably take
that into account.
+
+
+arm-linux-android 44 26 1f e5 0c 10 4b e2 02 20 81 e0
How does this work? I don't see how llvm-as would parse this as valid
LLVM IR, and this test fails locally for me.
+++ b/test/Bindings/llvm-c/globals.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llvm-shlib-test --list-module-globals | FileCheck %s
I guess this should be llvm-c-test --module-list-globals
--- /dev/null
+++ b/tools/llvm-c-test/llvm-c-test.h
@@ -0,0 +1,39 @@
+/*===-- llvm-c-test.h - tool for testing libLLVM and llvm-c API -----------===*\
+|* *|
+|* The LLVM Compiler Infrastructure *|
+|* *|
+|* This file is distributed under the University of Illinois Open Source *|
+|* License. See LICENSE.TXT for details. *|
+|* *|
+|*===----------------------------------------------------------------------===*|
+|* *|
All the other files had a nice comment here 
+|* *|
+|* *|
+\*===----------------------------------------------------------------------===*/
+#ifndef _LLVM_C_TEST_H
Do we normally use leading _ for include gurads in LLVM hreaders?
+#define _LLVM_C_TEST_H
+
+/* helpers.c */
These comments are pretty non-LLVM style. Not a big deal, but still.