私が間違っていたら訂正してください。しかし、Lisp パーサーはかなりうまく機能します。:P 真剣に、これは文字列の括弧で囲まれたリストまたは他の括弧で囲まれた式のように見えます。単純な再帰パーサーで十分です。ニーズに合った解析ツリーとして作成されるデータ構造を発明するだけです。
編集:くそー、私はついにそれを行うことができました... うーん、非常に単純なパーサーでさえ、午後 10 時から午後 12 時の間に正しく作成するのは、それほど簡単な作業ではありません。
/*
* lrparser.c
* LR-parser
* A recursive Lisp-subset parser
* that has a misleading name (it's not an LALR, but a recursive descent one).
*
* Originally written to answer
* http://stackoverflow.com/questions/15371008/string-processing-in-c/
*
* Made in some *really* bored hours by Árpád Goreity (H2CO3)
* on 12-03-2013
*
* Language: C99 (not sure if POSIX)
*/
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
// AST node type
enum {
NODE_STRING,
NODE_LIST
};
// Permitted tokens
enum {
TOKEN_INVALID = -1,
TOKEN_LPAREN = 0,
TOKEN_RPAREN,
TOKEN_STRING,
TOKEN_END
};
// Useful for debugging and error reporting
static const char *toknames[] = {
"Left paren",
"Right paren",
"String",
"End"
};
// ...Or simply an AST node...
struct ParseTree {
int type; // string or list
char *string; // if any
struct ParseTree **children;
size_t n_children;
};
// Construct a node structure from a type and any necessary data
static struct ParseTree *node_new(int type, ...)
{
va_list args;
va_start(args, type);
struct ParseTree *node = malloc(sizeof(*node));
assert(node != NULL);
node->type = type;
if (type == NODE_STRING) {
/* If the node is a string, fill it
* (ownership transfer: the argument will be
* free()'d by the node_free() function)
*/
node->string = va_arg(args, char *);
}
node->children = NULL;
node->n_children = 0;
va_end(args);
return node;
}
void node_free(struct ParseTree *tree)
{
switch (tree->type) {
case NODE_STRING:
free(tree->string);
break;
case NODE_LIST:
for (int i = 0; i < tree->n_children; i++) {
node_free(tree->children[i]);
}
free(tree->children);
break;
default:
fprintf(stderr, "Warning: unknown node type %d\n", tree->type);
break;
}
free(tree);
}
// Sorry, the usual logarithmic storage expansion is omitted for clarity
void node_add(struct ParseTree *parent, struct ParseTree *child)
{
assert(parent != NULL);
assert(child != NULL);
parent->n_children++;
parent->children = realloc(parent->children, sizeof(parent->children[0]) * parent->n_children);
// Lazy error checking: assert() instead of compare to NULL
assert(parent->children != NULL);
parent->children[parent->n_children - 1] = child;
}
// Just in order to break thread safety
static const char *s = NULL; // the string to be parsed
static char *curstr = NULL; // the contents of the string value of the current token
static int curtok; // the current token
// The tokenizer
static int lex()
{
// Whitespace doesn't matter
while (isspace(s[0])) {
s++;
}
// end of string
if (s[0] == 0) {
return TOKEN_END;
}
// The followin four are obvious
if (s[0] == '(') {
s++;
return curtok = TOKEN_LPAREN;
}
if (s[0] == ')') {
s++;
return curtok = TOKEN_RPAREN;
}
if (s[0] == '"') {
const char *begin = s;
while (*++s != '"')
;
size_t sz = s - begin - 2 + 1;
curstr = malloc(sz + 1);
memcpy(curstr, begin + 1, sz);
curstr[sz] = 0;
// skip trailing quotation mark (")
s++;
return curtok = TOKEN_STRING;
}
return curtok = TOKEN_INVALID;
}
void expect(int tok)
{
if (curtok != tok) {
fprintf(stderr, "Error: expected token %s, got %s\n", toknames[tok], toknames[curtok]);
abort();
}
lex();
}
// a. k. a. "parse()"
// Simple recursive (one-level...) descent (root == list) approach
static struct ParseTree *recurse_and_descend()
{
expect(TOKEN_LPAREN);
struct ParseTree *node = node_new(NODE_LIST);
struct ParseTree *child;
while (curtok != TOKEN_RPAREN) {
if (curtok == TOKEN_LPAREN) {
child = recurse_and_descend();
} else if (curtok == TOKEN_STRING) {
child = node_new(NODE_STRING, curstr);
lex();
} else {
fprintf(stderr, "Unexpected token '%s'\n", toknames[curtok]);
// lazy programmer's safety system, let the kernel do the dirty work
abort();
}
node_add(node, child);
}
expect(TOKEN_RPAREN);
return node;
}
static struct ParseTree *parse(const char *str)
{
s = str; // poor man's initialization
lex(); // The first breath of fresh token makes the baby's heart beat
return recurse_and_descend(); // Let's do the Harlem shake!
}
// petite helper function
static void dump_indent(int indent)
{
for (int i = 0; i < indent; i++) {
printf("\t");
}
}
// Because 0x7f502a00 is not very meaningful for the human eye
static void dump_tree(struct ParseTree *tree, int indent)
{
dump_indent(indent);
switch (tree->type) {
case NODE_STRING:
printf("<String \"%s\">\n", tree->string);
break;
case NODE_LIST:
printf("<List>\n");
for (int i = 0; i < tree->n_children; i++) {
dump_tree(tree->children[i], indent + 1);
}
break;
default:
printf("Unknown node\n");
break;
}
}
int main(int argc, char *argv[])
{
struct ParseTree *tree = parse(argv[1]);
dump_tree(tree, 0);
node_free(tree);
return 0;
}
使用法:
h2co3-macbook:~ h2co3$ ./lrparser "(\"do you like me?\" (\"yes\" (\"we're friends\")) (\"no\" (\"i hate you\" \"me too\")) )"
<List>
<String "do you like me?">
<List>
<String "yes">
<List>
<String "we're friends">
<List>
<String "no">
<List>
<String "i hate you">
<String "me too">