私はと同じようなことをしましlex
た。もちろん、それは一日おきに実行されるので、YMMV。リモートWindows共有上の数百メガバイトのファイルでも非常に高速です。処理には数秒しかかかりません。クイックプログラムをハックするのがどれほど快適かはわかりませんがC
、これが大規模な正規表現の問題に対する最も速くて簡単な解決策であることがわかりました。
有罪を保護するために編集された部品:
/**************************************************
start of definitions section
***************************************************/
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <getopt.h>
#include <errno.h>
char inputName[256];
// static insert variables
//other variables
char tempString[256];
char myHolder[256];
char fileName[256];
char unknownFileName[256];
char stuffFileName[256];
char buffer[5];
/* we are using pointers to hold the file locations, and allow us to dynamically open and close new files */
/* also, it allows us to obfuscate which file we are writing to, otherwise this couldn't be done */
FILE *yyTemp;
FILE *yyUnknown;
FILE *yyStuff;
// flags for command line options
static int help_flag = 0;
%}
%option 8bit
%option nounput nomain noyywrap
%option warn
%%
/************************************************
start of rules section
*************************************************/
(\"A\",\"(1330|1005|1410|1170)\") {
strcat(myHolder, yytext);
yyTemp = &(*yyStuff);
} //stuff files
. { strcat(myHolder, yytext); }
\n {
if (&(*yyTemp) == &(*yyUnknown))
unknownCount += 1;
strcat(myHolder, yytext);
//print to file we are pointing at, whatever it is
fprintf(yyTemp, "%s", myHolder);
strcpy(myHolder, "");
yyTemp = &(*yyUnknown);
}
<<EOF>> {
strcat(myHolder, yytext);
fprintf(yyTemp, "%s", myHolder);
strcpy(myHolder, "");
yyTemp = &(*yyUnknown);
yyterminate();
}
%%
/****************************************************
start of code section
*****************************************************/
int main(int argc, char **argv);
int main (argc,argv)
int argc;
char **argv;
{
/****************************************************
The main method drives the program. It gets the filename from the
command line, and opens the initial files to write to. Then it calls the lexer.
After the lexer returns, the main method finishes out the report file,
closes all of the open files, and prints out to the command line to let the
user know it is finished.
****************************************************/
int c;
// the gnu getopt library is used to parse the command line for flags
// afterwards, the final option is assumed to be the input file
while (1) {
static struct option long_options[] = {
/* These options set a flag. */
{"help", no_argument, &help_flag, 1},
/* These options don't set a flag. We distinguish them by their indices. */
{0, 0, 0, 0}
};
/* getopt_long stores the option index here. */
int option_index = 0;
c = getopt_long (argc, argv, "h",
long_options, &option_index);
/* Detect the end of the options. */
if (c == -1)
break;
switch (c) {
case 0:
/* If this option set a flag, do nothing else now. */
if (long_options[option_index].flag != 0)
break;
printf ("option %s", long_options[option_index].name);
if (optarg)
printf (" with arg %s", optarg);
printf ("\n");
break;
case 'h':
help_flag = 1;
break;
case '?':
/* getopt_long already printed an error message. */
break;
default:
abort ();
}
}
if (help_flag == 1) {
printf("proper syntax is: yourProgram.exe [OPTIONS]... INFILE\n");
printf("splits csv file into multiple files")
printf("Option list: \n");
printf("--help print help to screen\n");
printf("\n");
return 0;
}
//get the filename off the command line and redirect it to input
//if there is no filename then use stdin
if (optind < argc) {
FILE *file;
file = fopen(argv[optind], "r");
if (!file) {
fprintf (stderr, "%s: Couldn't open file %s; %s\n", argv[0], argv[optind], strerror (errno));
exit(errno);
}
yyin = file;
strcpy(inputName, argv[optind]);
}
else {
printf("no input file set, using stdin. Press ctrl-c to quit");
yyin = stdin;
strcpy(inputName, "\b\b\b\b\bagainst stdin");
}
//set up initial file names
strcpy(fileName, inputName);
strncpy(unknownFileName, fileName, strlen(fileName)-4);
strncpy(stuffFileName, fileName, strlen(fileName)-4);
strcat(unknownFileName, "_UNKNOWN_1.csv");
strcat(stuffFileName, "_STUFF_1.csv");
//open files for writing
yyout = stdout;
yyTemp = malloc(sizeof(FILE));
yyUnknown = fopen(unknownFileName,"w");
yyTemp = &(*yyUnknown);
yyStuff = fopen(stuffFileName,"w");
yylex();
//close open files
fclose(yyUnknown);
printf("Lexer finished running %s",fileName);
return 0;
}
このフレックスプログラムをビルドするには、フレックスをインストールし、このmakefileを使用します(パスを調整します)。
TARGET = project.exe
TESTBUILD = project
LEX = flex
LFLAGS = -Cf
CC = i586-mingw32msvc-gcc
CFLAGS = -O -Wall
INSTALLDIR = /mnt/J/Systems/executables
.PHONY: default all clean install uninstall cleanall
default: $(TARGET)
all: default install
OBJECTS = $(patsubst %.l, %.c, $(wildcard *.l))
%.c: %.l
$(LEX) $(LFLAGS) -o $@ $<
.PRECIOUS: $(TARGET) $(OBJECTS)
$(TARGET): $(OBJECTS)
$(CC) $(OBJECTS) $(CFLAGS) -o $@
linux: $(OBJECTS)
gcc $(OBJECTS) $(CFLAGS) -lm -g -o $(TESTBUILD)
cleanall: clean uninstall
clean:
-rm -f *.c
-rm -f $(TARGET)
-rm -f $(TESTBUILD)
uninstall:
-rm -f $(INSTALLDIR)/$(TARGET)
install:
cp -f $(TARGET) $(INSTALLDIR)