From 439d21a2b4b1643d7342ddbd489ff72fc13bf2b2 Mon Sep 17 00:00:00 2001 From: Alex Baines Date: Sat, 29 Apr 2017 16:32:29 +0100 Subject: [PATCH] Prototype old annotation -> hmml converter --- hmmlconv/hmmlconv.c | 495 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 495 insertions(+) create mode 100644 hmmlconv/hmmlconv.c diff --git a/hmmlconv/hmmlconv.c b/hmmlconv/hmmlconv.c new file mode 100644 index 0000000..02d694f --- /dev/null +++ b/hmmlconv/hmmlconv.c @@ -0,0 +1,495 @@ +#if 0 +cc "$0" -g -std=c99 -D_POSIX_SOURCE -o "${0%.c}" +exit +#endif + +#include +#include +#include +#include // access +#include // strerror +#include // open +#include // open, mkdir +#include // open, mkdir +#include + +typedef struct +{ + char *Site; + char *Page; + char *URL; +} Resource; + +static Resource Resources[64]; +static int ResCount; + +// Removes directories from a path, e.g.: /path/to/thing -> thing +char *BaseName(char *Path) +{ + char *Base = Path; + for(char *c = Path; *c; ++c) + { + if(*c == '/') + { + Base = c + 1; + } + } + + return Base; +} + +int PromptOverwrite(char *Name, int* Always) +{ + while(1){ + printf("The file '%s' exists. Overwrite? [Yes|No|Always|eXit]\n> ", Name); + fflush(stdout); + + int C; + switch((C = getchar())) + { + case 'a': + case 'A': + *Always = 1; + case 'y': + case 'Y': + while(getchar() != '\n'); + return 1; + case 'n': + case 'N': + while(getchar() != '\n'); + return 0; + case 'x': + case 'X': + case EOF: + exit(0); + } + + printf("Unknown option '%c'\n", C); + + if(C != '\n'){ + while(getchar() != '\n'); + } + } +} + +char *ReadWholeFile(FILE *File) +{ + fseek(File, 0, SEEK_END); + size_t Size = ftell(File); + fseek(File, 0, SEEK_SET); + + char *Buffer = malloc(Size + 1); + if(!Buffer) + { + perror("malloc"); + exit(1); + } + + fread(Buffer, Size, 1, File); + Buffer[Size] = 0; + + return Buffer; +} + +void SkipWhitespace(char **Ptr) +{ + while(**Ptr && **Ptr <= ' ') + { + ++*Ptr; + } +} + +char *InPlaceUnescape(char *In) +{ + if(*In == '"') ++In; + char *Result = In; + char *Out = In; + + while(*In && *In != '"') + { + if(*In == '\\') + { + ++In; + } + + *Out++ = *In++; + } + + *Out = 0; + return Result; +} + +int IsAlNum(char C) +{ + return (C >= '0' && C <= '9') || (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z'); +} + +Resource *LookupResource(char *Tag, char *Line) +{ + Resource *Res = NULL; + + for(int i = 0; i < ResCount; ++i) + { + if(strcmp(Tag, Resources[i].Site) == 0 || strcmp(Tag, Resources[i].Page) == 0) + { + Res = Resources + i; + if(strstr(Line, Res->Page)) + { + break; + } + } + } + + return Res; +} + +void ProcessAnnotation(Resource* Resources, size_t ResCount, char *Line, FILE *OutFile) +{ + SkipWhitespace(&Line); + assert(*Line == '"'); + + fputc('[', OutFile); + while(*++Line != '"') + { + fputc(*Line, OutFile); + } + fputc(']', OutFile); + + assert(*++Line == ':'); + assert(*++Line == ' '); + assert(*++Line == '"'); + + Line = InPlaceUnescape(Line); + + // convert author + if(Line[0] == '@') + { + char *P = strchr(Line, ' '); + assert(P); + + if(P[-1] == ':') + { + --P; + } + + fprintf(OutFile, "[@%.*s]", (int)(P - Line), Line); + Line = P+1; + } + + char RefBuf[256]; + char *LinePtr; + char *RunStart = Line; + char *FirstSpace = NULL; + int ConsiderQuote = 0; + + fputc('[', OutFile); + + int QuoteID = -1; + + for(LinePtr = Line; *LinePtr; ++LinePtr) + { + int ScanBytes; + + // convert Resource -> ref + if(LinePtr[0] == ' ' && LinePtr[1] == '[' && sscanf(LinePtr+1, "[see Resources, %255[^]]]%n", RefBuf, &ScanBytes) == 1) + { + //printf("Find Resource [%s]\n", RefBuf); + Resource *Res = LookupResource(RefBuf, Line); + if(Res){ + fprintf(OutFile, + "%.*s[ref\n site=\"%s\"\n page=\"%s\"\n url=\"%s\"]", + (int)(LinePtr - RunStart), + RunStart, + Res->Site, + Res->Page, + Res->URL); + + LinePtr += ScanBytes; + RunStart = LinePtr+1; + + } + else + { + fprintf(stderr, "WARNING: can't find resource [%s] :(\n", RefBuf); + } + } + + // convert quotes + else if(LinePtr[0] == ' ' && LinePtr[1] == '(' && sscanf(LinePtr+1, "(!quote %d)%n", &QuoteID, &ScanBytes) == 1) + { + fprintf(OutFile, "%.*s", (int)(LinePtr - RunStart), RunStart); + LinePtr += ScanBytes; + RunStart = LinePtr+1; + } + + + // Used for getting name for Q:'s + else if(*LinePtr == ' ') + { + if(!FirstSpace) + { + FirstSpace = LinePtr; + ConsiderQuote = 1; + } + else + { + ConsiderQuote = 0; + } + } + + // Q: + else if(LinePtr[0] == 'Q' && LinePtr[1] == ':' && ConsiderQuote) + { + LinePtr += 2; + RunStart = LinePtr+1; + fprintf(OutFile, "@%.*s][", (int)(FirstSpace - Line), Line); + } + + // Escape stuff + else if(*LinePtr == ']' || + (LinePtr[0] == '\\' && LinePtr[1] != '"') || + (LinePtr > Line && LinePtr[-1] == ' ' && strchr(":~@", LinePtr[0]) && !IsAlNum(LinePtr[1]))) + { + fprintf(OutFile, "%.*s\\", (int)(LinePtr - RunStart), RunStart); + RunStart = LinePtr; + } + } + + // write out remaining text + fprintf(OutFile, "%.*s]", (int)(LinePtr - RunStart), RunStart); + + // write out quote node if applicable + if(QuoteID != -1) + { + fprintf(OutFile, "[quote %d]", QuoteID); + } + + fputc('\n', OutFile); +} + +int ProcessFile(char *InFileName, FILE *InFile, FILE *OutFile) +{ + char *Contents = ReadWholeFile(InFile); + char *Ptr; + + printf("Processing [%s]...\n", InFileName); + + // Resources + ResCount = 0; + if((Ptr = strstr(Contents, "\n## Resources"))) + { + char *LinePtrState; + char *LinePtr = strtok_r(Ptr+13, "\r\n", &LinePtrState); + + for(; LinePtr; LinePtr = strtok_r(NULL, "\r\n", &LinePtrState)) + { + if(LinePtr[0] == '#' && LinePtr[1] == '#') + { + break; + } + + if(LinePtr[0] != '*' || LinePtr[1] != ' ') + { + continue; + } + + LinePtr += 2; + + // Site (seems to be optional [day 205]) + { + int Inc = 2; + char *Separator = strstr(LinePtr, " ["); + if(!Separator) + { + Separator = strstr(LinePtr, " '["); + Inc = 3; + } + + if(!Separator) + { + Separator = strstr(LinePtr, "["); + Inc = 1; + } + + if(!Separator) continue; + + char *S = (Separator[-1] == ',' || Separator[-1] == ':') ? Separator - 1 : Separator; + *S = 0; + + Resources[ResCount].Site = LinePtr; + LinePtr = Separator + Inc; + } + + // Page + { + char *Separator = strstr(LinePtr, "]("); + if(!Separator) continue; + + // needed for day115, maybe others + { + char *S = Separator[-1] == '*' ? Separator - 1 : Separator; + *S = 0; + } + if(*LinePtr == '*') ++LinePtr; + + Resources[ResCount].Page = LinePtr; + LinePtr = Separator + 2; + } + + // URL + { + char *Separator = strstr(LinePtr, ")"); + if(!Separator) continue; + *Separator = 0; + + Resources[ResCount].URL = LinePtr; + LinePtr = Separator + 1; + } + + printf("Add Res: %s\n", Resources[ResCount].Site); + + ++ResCount; + } + } + + char *Title; + char *VideoID; + + enum { + STATE_METADATA, + STATE_MARKERS, + } State = STATE_METADATA; + + int LineNumber = 0; + char *LineState; + char *LinePtr = strtok_r(Contents, "\r\n", &LineState); + + for(; LinePtr; LinePtr = strtok_r(NULL, "\r\n", &LineState)) + { + switch(State) + { + case STATE_METADATA: { + if(strncmp(LinePtr, "title: ", 7) == 0) + { + Title = InPlaceUnescape(LinePtr + 7); + } + else if(strncmp(LinePtr, "videoId: ", 9) == 0) + { + VideoID = InPlaceUnescape(LinePtr + 9); + } + else if(strncmp(LinePtr, "markers:", 8) == 0) + { + fprintf(OutFile, "[video member=cmuratori project=hero title=\"%s\" platform=youtube id=%s annotator=Miblo]\n", Title, VideoID); + State = STATE_MARKERS; + } + } break; + + case STATE_MARKERS: { + if(strncmp(LinePtr, "---", 3) == 0){ + goto Done; + } else { + ProcessAnnotation(Resources, ResCount, LinePtr, OutFile); + } + } break; + } + + ++LineNumber; + } + +Done: + fputs("[/video]\n", OutFile); + free(Contents); +} + +int main(int ArgC, char **Args) +{ + if(ArgC < 3) + { + fprintf(stderr, "Usage: %s [Files...] [Output Directory]\n", Args[0]); + return 1; + } + + const char *OutDirName = Args[ArgC-1]; + + // Check if the directory exists and we can write to it. + // If it doesn't exist, create it. + + if(access(OutDirName, R_OK | W_OK | X_OK) == -1) + { + if(errno == ENOENT) + { + if(mkdir(OutDirName, 0777) == -1) + { + fprintf(stderr, "Couldn't create directory [%s]: %s\n", OutDirName, strerror(errno)); + } + } + else + { + fprintf(stderr, "Error accessing %s: %s\n", OutDirName, strerror(errno)); + return 1; + } + } + + // Loop through all the files and convert them. + + int Errors = 0; + int AlwaysOverwrite = 0; + + for(int i = 1; i < ArgC-1; ++i) + { + char *FileNameBase = BaseName(Args[i]); + char OutNameBuf[strlen(OutDirName) + strlen(FileNameBase) + 7]; + sprintf(OutNameBuf, "%s/%s.hmml", OutDirName, FileNameBase); + + FILE *OutFile; + { + int OpenFlags = O_CREAT | O_EXCL | O_WRONLY; + + // Use the POSIX open function for a change, and so we can more easily + // test for the file already existing. +OpenOutput:; + int OutFileDesc = open(OutNameBuf, OpenFlags, 0666); + if(OutFileDesc == -1) + { + if(errno == EEXIST) + { + if(AlwaysOverwrite || PromptOverwrite(OutNameBuf, &AlwaysOverwrite)) + { + OpenFlags = (OpenFlags & ~O_CREAT) | O_TRUNC; + goto OpenOutput; + } + } + else + { + perror("open"); + } + } else { + OutFile = fdopen(OutFileDesc, "w"); + if(!OutFile) + { + fprintf(stderr, "Error opening %s: %s\n", Args[i], strerror(errno)); + } + } + } + + FILE* InFile = fopen(Args[i], "r"); + if(!InFile) + { + fprintf(stderr, "Error opening %s: %s\n", Args[i], strerror(errno)); + } + + if(!InFile || !OutFile || !ProcessFile(FileNameBase, InFile, OutFile)) + { + ++Errors; + } + + if(InFile) fclose(InFile); + if(OutFile) fclose(OutFile); + } + + if(Errors){ + printf("There were errors processing %d files.\n", Errors); + } + + return Errors != 0; +}