Annotation-System/hmmlconv/hmmlconv.c

573 lines
14 KiB
C
Raw Permalink Normal View History

#if 0
cc "$0" -g -std=c99 -D_POSIX_SOURCE -o "${0%.c}"
exit
#endif
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <unistd.h> // access
#include <string.h> // strerror
#include <fcntl.h> // open
#include <sys/types.h> // open, mkdir
#include <sys/stat.h> // open, mkdir
2017-05-01 09:44:22 +00:00
#define LError(Format, ...) do { fprintf(stderr, " l.%d: " Format, LineNumber, ##__VA_ARGS__); } while(0)
typedef struct
{
char *Site;
char *Page;
char *URL;
} Resource;
static Resource Resources[64];
static int ResCount;
// Removes directories from a path, e.g.: /path/to/thing -> thing
char *BaseName(char *Path)
{
char *Base = Path;
for(char *c = Path; *c; ++c)
{
if(*c == '/')
{
Base = c + 1;
}
}
return Base;
}
int PromptOverwrite(char *Name, int* Always)
{
while(1){
printf("The file '%s' exists. Overwrite? [Yes|No|Always|eXit]\n> ", Name);
fflush(stdout);
int C;
switch((C = getchar()))
{
case 'a':
case 'A':
*Always = 1;
case 'y':
case 'Y':
while(getchar() != '\n');
return 1;
case 'n':
case 'N':
while(getchar() != '\n');
return 0;
case 'x':
case 'X':
case EOF:
exit(0);
}
printf("Unknown option '%c'\n", C);
if(C != '\n'){
while(getchar() != '\n');
}
}
}
char *ReadWholeFile(FILE *File)
{
fseek(File, 0, SEEK_END);
size_t Size = ftell(File);
fseek(File, 0, SEEK_SET);
char *Buffer = malloc(Size + 1);
if(!Buffer)
{
perror("malloc");
exit(1);
}
fread(Buffer, Size, 1, File);
Buffer[Size] = 0;
return Buffer;
}
void SkipWhitespace(char **Ptr)
{
while(**Ptr && **Ptr <= ' ')
{
++*Ptr;
}
}
char *InPlaceUnescape(char *In)
{
if(*In == '"') ++In;
char *Result = In;
char *Out = In;
while(*In && *In != '"')
{
if(*In == '\\')
{
++In;
}
*Out++ = *In++;
}
if(*In != '"' || In[1] != '\0')
2017-05-01 09:44:22 +00:00
{
return NULL;
}
*Out = 0;
return Result;
}
int IsAlNum(char C)
{
return (C >= '0' && C <= '9') || (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z');
}
Resource *LookupResource(char *Tag, char *Line)
{
Resource *Res = NULL;
for(int i = 0; i < ResCount; ++i)
{
if(strcmp(Tag, Resources[i].Site) == 0 || strcmp(Tag, Resources[i].Page) == 0)
{
Res = Resources + i;
if(strstr(Line, Res->Page))
{
break;
}
}
}
return Res;
}
2017-05-01 09:44:22 +00:00
enum
{
TC_NAN = 1,
TC_COLONS = 2,
TC_OUT_OF_RANGE = 3
};
2017-05-01 08:26:56 +00:00
int
ValidateTimecode(char *Timecode)
{
int HMS[3] = { 0, 0, 0 }; // 0 == Seconds; 1 == Minutes; 2 == Hours
int Colons = 0;
while(*Timecode)
{
2017-05-01 09:44:22 +00:00
if(!(*Timecode >= '0' && *Timecode <= '9') && *Timecode != ':') { return TC_NAN; }
2017-05-01 08:26:56 +00:00
if(*Timecode == ':')
{
++Colons;
2017-05-01 09:44:22 +00:00
if(Colons > 2) { return TC_COLONS; }
2017-05-01 08:26:56 +00:00
for(int i = 0; i < Colons; ++i)
{
HMS[Colons - i] = HMS[Colons - (i + 1)];
}
HMS[0] = 0;
}
else
{
HMS[0] = HMS[0] * 10 + *Timecode - '0';
}
++Timecode;
}
2017-05-01 09:44:22 +00:00
if(HMS[0] > 59 || HMS[1] > 59 || Timecode[-1] == ':') { return TC_OUT_OF_RANGE; }
2017-05-01 08:26:56 +00:00
return 0;
}
void ProcessAnnotation(char *Line, int LineNumber, FILE *OutFile)
{
SkipWhitespace(&Line);
2017-05-01 09:44:22 +00:00
if(*Line != '"') { LError("Syntax error, line must begin with a \": %.*s...\n", 8, Line); exit(1); }
fputc('[', OutFile);
2017-05-01 08:26:56 +00:00
char Timecode[9];
char *Ptr = Timecode;
while(*++Line != '"' && *Line != '\n')
{
2017-05-01 08:26:56 +00:00
*Ptr++ = *Line;
}
2017-05-01 08:26:56 +00:00
*Ptr = '\0';
switch(ValidateTimecode(Timecode))
{
case 0: break;
2017-05-01 09:44:22 +00:00
case TC_NAN: LError("Invalid timecode, not a number: %s\n", Timecode); exit(1);
case TC_COLONS: LError("Invalid timecode, too many colons: %s\n", Timecode); exit(1);
case TC_OUT_OF_RANGE: LError("Invalid timecode, not 0-59: %s\n", Timecode); exit(1);
2017-05-01 08:26:56 +00:00
}
fputs(Timecode, OutFile);
fputc(']', OutFile);
2017-05-01 09:44:22 +00:00
if(*++Line != ':') { LError("Syntax error, missing : before: %.*s...\n", 8, Line); exit(1); }
if(*++Line != ' ') { LError("Syntax error, missing space before: %.*s...\n", 8, Line); exit(1); }
if(*++Line != '"') { LError("Syntax error, missing \" before: %.*s...\n", 8, Line); exit(1); }
2017-05-01 08:26:56 +00:00
char *LinePtr = Line;
2017-05-01 09:44:22 +00:00
if(!(Line = InPlaceUnescape(Line)))
{
LError("Syntax error, invalid quoting in %.*s\n", Line-LinePtr, LinePtr); exit(1);
2017-05-01 09:44:22 +00:00
}
// convert author
if(Line[0] == '@')
{
char *P = strchr(Line, ' ');
2017-05-01 09:44:22 +00:00
if(!P) { LError("Invalid annotation, cannot contain only a member: %.*s...\n", 8, Line); exit(1); }
if(P[-1] == ':')
{
--P;
}
fprintf(OutFile, "[@%.*s]", (int)(P - Line), Line);
Line = P+1;
}
char RefBuf[256];
char *RunStart = Line;
char *FirstSpace = NULL;
2017-05-01 09:44:22 +00:00
int ConsiderAuthored = 0;
fputc('[', OutFile);
int QuoteID = -1;
for(LinePtr = Line; *LinePtr; ++LinePtr)
{
2017-04-30 01:52:39 +00:00
int ScanBytes = 0;
int TmpQuoteID;
// convert Resource -> ref
2017-05-01 08:26:56 +00:00
// TODO(matt): Gather the whole Resources section, and prompt if we match more than one resource
// TODO(matt): (Maybe) Lookup individual words from the [see Resources] thing
2017-04-30 01:52:39 +00:00
if(LinePtr[0] == ' ' && LinePtr[1] == '[' && sscanf(LinePtr+1, "[see Resources, %255[^]]]%n", RefBuf, &ScanBytes) == 1 && ScanBytes)
{
//printf("Find Resource [%s]\n", RefBuf);
Resource *Res = LookupResource(RefBuf, Line);
if(Res){
fprintf(OutFile,
"%.*s[ref\n site=\"%s\"\n page=\"%s\"\n url=\"%s\"]",
(int)(LinePtr - RunStart),
RunStart,
Res->Site,
Res->Page,
Res->URL);
LinePtr += ScanBytes;
RunStart = LinePtr+1;
}
else
{
2017-05-01 09:44:22 +00:00
LError("WARNING: can't find resource: %s :(\n", RefBuf);
}
}
// convert quotes
2017-04-30 01:52:39 +00:00
else if(LinePtr[0] == ' ' && LinePtr[1] == '(' && sscanf(LinePtr+1, "(!quote %d)%n", &TmpQuoteID, &ScanBytes) == 1 && ScanBytes)
{
2017-04-30 01:52:39 +00:00
QuoteID = TmpQuoteID;
fprintf(OutFile, "%.*s", (int)(LinePtr - RunStart), RunStart);
LinePtr += ScanBytes;
RunStart = LinePtr+1;
}
// Used for getting name for Q:'s
else if(*LinePtr == ' ')
{
if(!FirstSpace)
{
FirstSpace = LinePtr;
2017-05-01 09:44:22 +00:00
ConsiderAuthored = 1;
}
else
{
2017-05-01 09:44:22 +00:00
ConsiderAuthored = 0;
}
}
// Q:
2017-05-01 09:44:22 +00:00
else if(LinePtr[0] == 'Q' && LinePtr[1] == ':' && ConsiderAuthored)
{
LinePtr += 2;
RunStart = LinePtr+1;
fprintf(OutFile, "@%.*s][", (int)(FirstSpace - Line), Line);
}
// Escape stuff
else if(*LinePtr == ']' ||
*LinePtr == '[' ||
*LinePtr == '\\' ||
(LinePtr > Line && LinePtr[-1] == ' ' && strchr(":~@", LinePtr[0]) && !IsAlNum(LinePtr[1])))
{
fprintf(OutFile, "%.*s\\", (int)(LinePtr - RunStart), RunStart);
RunStart = LinePtr;
}
}
// write out remaining text
fprintf(OutFile, "%.*s]", (int)(LinePtr - RunStart), RunStart);
// write out quote node if applicable
if(QuoteID != -1)
{
fprintf(OutFile, "[quote %d]", QuoteID);
}
fputc('\n', OutFile);
}
int ProcessFile(char *InFileName, FILE *InFile, FILE *OutFile)
{
char *Contents = ReadWholeFile(InFile);
char *Ptr;
2017-05-01 08:26:56 +00:00
if(!(getenv("HERO")))
{
printf("Processing [%s]...\n", InFileName);
}
// Resources
ResCount = 0;
if((Ptr = strstr(Contents, "\n## Resources")))
{
char *LinePtrState;
char *LinePtr = strtok_r(Ptr+13, "\r\n", &LinePtrState);
for(; LinePtr; LinePtr = strtok_r(NULL, "\r\n", &LinePtrState))
{
if(LinePtr[0] == '#' && LinePtr[1] == '#')
{
break;
}
if(LinePtr[0] != '*' || LinePtr[1] != ' ')
{
continue;
}
LinePtr += 2;
// Site (seems to be optional [day 205])
{
int Inc = 2;
char *Separator = strstr(LinePtr, " [");
if(!Separator)
{
Separator = strstr(LinePtr, " '[");
Inc = 3;
}
if(!Separator)
{
Separator = strstr(LinePtr, "[");
Inc = 1;
}
if(!Separator) continue;
char *S = (Separator[-1] == ',' || Separator[-1] == ':') ? Separator - 1 : Separator;
*S = 0;
Resources[ResCount].Site = LinePtr;
LinePtr = Separator + Inc;
}
// Page
{
char *Separator = strstr(LinePtr, "](");
if(!Separator) continue;
// needed for day115, maybe others
{
char *S = Separator[-1] == '*' ? Separator - 1 : Separator;
*S = 0;
}
if(*LinePtr == '*') ++LinePtr;
Resources[ResCount].Page = LinePtr;
LinePtr = Separator + 2;
}
// URL
{
char *Separator = strstr(LinePtr, ")");
if(!Separator) continue;
*Separator = 0;
Resources[ResCount].URL = LinePtr;
LinePtr = Separator + 1;
}
2017-05-01 08:26:56 +00:00
if(!(getenv("HERO")))
{
printf("Add Res: %s\n", Resources[ResCount].Site);
}
++ResCount;
}
}
char *Title;
char *VideoID;
enum {
STATE_METADATA,
STATE_MARKERS,
} State = STATE_METADATA;
2017-05-01 08:26:56 +00:00
int LineNumber = 1;
char *LineState;
char *LinePtr = strtok_r(Contents, "\r\n", &LineState);
for(; LinePtr; LinePtr = strtok_r(NULL, "\r\n", &LineState))
{
switch(State)
{
case STATE_METADATA: {
if(strncmp(LinePtr, "title: ", 7) == 0)
{
Title = InPlaceUnescape(LinePtr + 7);
}
else if(strncmp(LinePtr, "videoId: ", 9) == 0)
{
VideoID = InPlaceUnescape(LinePtr + 9);
}
else if(strncmp(LinePtr, "markers:", 8) == 0)
{
fprintf(OutFile, "[video member=cmuratori stream_platform=twitch stream_username=handmade_hero project=hero title=\"%s\" vod_platform=youtube id=%s annotator=Miblo]\n", Title, VideoID);
State = STATE_MARKERS;
}
} break;
case STATE_MARKERS: {
if(strncmp(LinePtr, "---", 3) == 0){
goto Done;
} else {
2017-05-01 08:26:56 +00:00
ProcessAnnotation(LinePtr, LineNumber, OutFile);
}
} break;
}
++LineNumber;
}
Done:
fputs("[/video]\n", OutFile);
free(Contents);
return 1;
}
int main(int ArgC, char **Args)
{
if(ArgC < 3)
{
fprintf(stderr, "Usage: %s [Files...] [Output Directory]\n", Args[0]);
return 1;
}
const char *OutDirName = Args[ArgC-1];
// Check if the directory exists and we can write to it.
// If it doesn't exist, create it.
if(access(OutDirName, R_OK | W_OK | X_OK) == -1)
{
if(errno == ENOENT)
{
if(mkdir(OutDirName, 0777) == -1)
{
fprintf(stderr, "Couldn't create directory [%s]: %s\n", OutDirName, strerror(errno));
}
}
else
{
fprintf(stderr, "Error accessing %s: %s\n", OutDirName, strerror(errno));
return 1;
}
}
// Loop through all the files and convert them.
int Errors = 0;
int AlwaysOverwrite = 0;
2017-05-01 08:26:56 +00:00
if(getenv("HERO"))
{
AlwaysOverwrite = 1;
}
for(int i = 1; i < ArgC-1; ++i)
{
char *FileNameBase = BaseName(Args[i]);
char OutNameBuf[strlen(OutDirName) + strlen(FileNameBase) + 7];
sprintf(OutNameBuf, "%s/%s.hmml", OutDirName, FileNameBase);
FILE *OutFile = NULL;
{
int OpenFlags = O_CREAT | O_EXCL | O_WRONLY;
// Use the POSIX open function for a change, and so we can more easily
// test for the file already existing.
OpenOutput:;
int OutFileDesc = open(OutNameBuf, OpenFlags, 0666);
if(OutFileDesc == -1)
{
if(errno == EEXIST)
{
if(AlwaysOverwrite || PromptOverwrite(OutNameBuf, &AlwaysOverwrite))
{
OpenFlags = (OpenFlags & ~O_EXCL) | O_TRUNC;
goto OpenOutput;
}
}
else
{
perror("open");
}
} else {
OutFile = fdopen(OutFileDesc, "w");
if(!OutFile)
{
2017-04-30 01:52:39 +00:00
fprintf(stderr, "Error opening %s: %s\n", OutNameBuf, strerror(errno));
}
}
}
FILE* InFile = fopen(Args[i], "r");
if(!InFile)
{
fprintf(stderr, "Error opening %s: %s\n", Args[i], strerror(errno));
}
if(InFile && OutFile && !ProcessFile(FileNameBase, InFile, OutFile))
{
++Errors;
}
if(InFile) fclose(InFile);
if(OutFile) fclose(OutFile);
}
if(Errors){
printf("There were errors processing %d files.\n", Errors);
}
return Errors != 0;
}