From 3905b46e75608467278e53cf55e1655b28b15a2e Mon Sep 17 00:00:00 2001 From: Brychan Dempsey Date: Thu, 11 Mar 2021 16:18:05 +1300 Subject: [PATCH] Restructured most of the project, clarified implementations --- Assignment 1/Program.cs | 393 ++++++++++++++++++++++++++++++---------- 1 file changed, 295 insertions(+), 98 deletions(-) diff --git a/Assignment 1/Program.cs b/Assignment 1/Program.cs index 31ff4c2..7a48992 100644 --- a/Assignment 1/Program.cs +++ b/Assignment 1/Program.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; using System.IO; using System.Text; @@ -6,6 +7,27 @@ namespace Assignment_1 { class Program { + enum VariableFlags + { + Empty = 0, + Reserved = 1 + } + + /// + /// This captures the end-point of each part of an expression (in the stream), to validate the syntax + /// Optionally also captures the parsed string for each expression. + /// + /// For this program, a word is considered to be any non-whitespace value bounded by whitespace or the array boundary. + /// + /// + + static Dictionary> Symbols = new Dictionary> + { + { "SPACE", new Tuple(" ", VariableFlags.Reserved) }, + { "TAB", new Tuple("\t", VariableFlags.Reserved) }, + { "NEWLINE", new Tuple("\n", VariableFlags.Reserved) }, + { "CARRIAGE_RETURN", new Tuple("\r", VariableFlags.Reserved) } + }; static void Main(string[] args) { Console.WriteLine("┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓"); @@ -13,6 +35,9 @@ namespace Assignment_1 Console.WriteLine("┃ Submitted by Brychan Dempsey, 14299890 ┃"); Console.WriteLine("┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛"); MemoryStream sourceStream = new MemoryStream(1024); // Creates a memory stream to retain source while being interpreted. + + + Parser parser = new Parser(); bool dynamicInput = false; // From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected @@ -28,76 +53,181 @@ namespace Assignment_1 sourceStream.Position = 0; dynamicInput = true; } - parser.FindProgram(sourceStream, dynamicInput); + parser.StartParsing(sourceStream, dynamicInput); } public class Parser { - public int FindProgram(Stream sourceStream, bool dynamicInput = false) + public enum statements { - if (sourceStream.ReadByte() == '{') - { - FindStatement(); - } - else return -1; // Could not find the start of the program + exit, + append, + list, + print, + printlength, + printwords, + printwordcount, + set, + reverse + } + public void StartParsing(Stream source, bool dynamicInput = false) + { + } - void Command_Exit() + #region Function Handling + bool Append(Stream source) { - Environment.Exit(0); - } - - int FindStatement(Stream s) - { - string statement; - if (dynamicInput) + string key; + long advance = FindIdentifier(source, out key); + if (advance < 0) { - Console.Write("Enter a \'Statement\': "); - statement = Console.ReadLine(); + // Error on finding object + return false; } else { - statement = GetNextLine(s); + source.Position = advance; } - - int wordBound = 0; - string nextWord = GetNextWord(statement, out wordBound); - switch (nextWord) + string value; + advance = FindValue(source, out value); + if (advance < 0) { - case "append": - FindIdentifier(); - FindExpression(); - break; - case "list": - break; - case "exit": - Environment.Exit(0); - break; - case "print": - break; - case "printlength": - break; - case "printwords": - break; - case "printwordcount": - break; - case "set": - break; - case "reverse": - break; - default: - break; + // Error on parsing value + return false; } - - // Look for further elements - return 1; + else + { + source.Position = advance; + } + string eol; + FindNextWord(source, out eol); + if (eol[0] != ';') + { + // Expected end-of-statement/end-of-line (;) + return false; + } + if (Symbols[key].Item2 == VariableFlags.Reserved) + { + // Can't assign to reserved items + return false; + } + Symbols[key] = new Tuple(Symbols[key].Item1 + value, Symbols[key].Item2); + return true; } - int FindIdentifier(Stream s) + #endregion + #region Data Handling + // Data Handling + /// + /// Parses the expression from the point in the string + /// + /// + /// + /// + long FindExpression(Stream s, out string expression) { - + // must contain at least one value + string result; + long wordEnd = FindValue(s, out result); + while (true) + { + string nextWord; + wordEnd = FindNextWord(s, out nextWord); + if (wordEnd > 0 && nextWord == "+") + { + s.Position = wordEnd; + } + else + { + break; + } + s.Position = wordEnd; + wordEnd = FindNextWord(s, out nextWord); + result += nextWord; + } + expression = result; + return wordEnd; } + + + // Most atomic unit is 'value': + /// + /// Finds the next value in the stream + /// + /// + /// + /// + long FindValue(Stream s, out string returnedValue) + { + SkipWhitespace(s); + int result = s.ReadByte(); + if (result == '\"') + { + return FindLiteral(s, out returnedValue); + } + else + { + string keyValue; + long t = FindIdentifier(s, out keyValue); + returnedValue = Symbols[keyValue].Item1; + return t; + + } + } + + long FindIdentifier(Stream s, out string returnedKey) + { + string identifier; + long wordEnd = FindNextWord(s, out identifier); + // Lookup the value in the symbol table + try + { + returnedKey = Symbols[identifier].Item1; + } + catch (KeyNotFoundException e) + { + Console.WriteLine("Could not find a defined variable with the name {0}", identifier); + Console.Error.WriteLine(e); + returnedKey = ""; + return -1; + } + return wordEnd; + } + + long FindLiteral(Stream s, out string returnedLiteral) + { + // Is a literal. Now we must parse until we find the end of the literal + string resultLiteral; + long resultPosition = FindNextOccurance(s, (c, s) => + { + if (c == '\"') + { + long pos = s.Position--; + if (GetChar(s) == '\\') + { + // TODO: handle the \\ escape + return false; + } + else + { + return true; + } + } + return false; + }, out resultLiteral); + if (resultPosition > -1) + { + returnedLiteral = resultLiteral; + } + else + { + returnedLiteral = ""; + } + return resultPosition; + } + #endregion } - + /// @@ -107,52 +237,17 @@ namespace Assignment_1 /// static string GetNextLine(Stream s) { - long start = s.Position; - StringBuilder sb = new StringBuilder(); - bool newLineFound = false; - while (!newLineFound) - { - // As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected - // value. - // Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size - int readAmount = 0; - int firstChar = s.ReadByte(); - if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character - { - readAmount = 3; - } - else if((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte - { - readAmount = 2; - } - else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte - { - readAmount = 1; - } - - byte[] charBytes = new byte[readAmount + 1]; - charBytes[0] = (byte)firstChar; - for (int i = 1; i < readAmount; i++) - { - int nextChar = s.ReadByte(); - if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!"); - charBytes[i] = (byte)nextChar; - } - string converted = Encoding.UTF8.GetString(charBytes); - if (converted == "\r" || converted == "\n") - { - if (s.ReadByte() != '\n') s.Position--; // Return the position if the next character isn't a new line - newLineFound = true; - } - else - { - sb.Append(converted); - } - } - return sb.ToString(); + string nextLine; + FindNextOccurance(s, '\n', out nextLine); + return nextLine; } - - static string GetNextWord(string s, out int wordEndPos) + /// + /// Finds the next word in the string + /// + /// + /// + /// A value <0 if an error occurred, else the position of the end of the word + static long FindNextWord(string s, out string nextWord) { // remove whitespace from the start int wordStart = 0; @@ -170,8 +265,110 @@ namespace Assignment_1 if (char.IsWhiteSpace(s[i])) break; wordEnd = i; } - wordEndPos = wordEnd; - return s.Substring(wordStart, wordEnd); + + nextWord = s.Substring(wordStart, wordEnd); + return wordEnd; + } + /// + /// Finds the end-boundary of the next word in the stream + /// + /// + /// + /// + static long FindNextWord(Stream s, out string nextWord) + { + return FindNextOccurance(s, (c, s) => Char.IsWhiteSpace(c), out nextWord); + } + + + /// + /// Finds and returns the position of the next occurance of the Func returning true. + /// + /// + /// + /// + /// + static long FindNextOccurance(Stream s, Func p, out string result) + { + long start = s.Position; + StringBuilder sb = new StringBuilder(); + bool charFound = false; + while (!charFound) + { + char nextChar = GetChar(s); + if (p(nextChar, s)) + { + /*if (c == '\n') + { + s.Position--; + if (s.ReadByte() != '\r') s.Position--; + // Avoid capturing the carriage return + }*/ + charFound = true; + } + else + { + sb.Append(nextChar); + } + } + + result = sb.ToString(); + long newPosition = s.Position; + s.Position = start; + return newPosition; + } + + /// + /// Finds the next position of the character + /// + /// + /// + /// Captures the string read in searching for the character + /// + static long FindNextOccurance(Stream s, char c, out string result) + { + return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result); + } + + static char GetChar(Stream s) + { + // As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected + // value. + // Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size + int readAmount = 0; + int firstChar = s.ReadByte(); + if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character + { + readAmount = 3; + } + else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte + { + readAmount = 2; + } + else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte + { + readAmount = 1; + } + + byte[] charBytes = new byte[readAmount + 1]; + charBytes[0] = (byte)firstChar; + for (int i = 1; i < readAmount; i++) + { + int nextChar = s.ReadByte(); + if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!"); + charBytes[i] = (byte)nextChar; + } + string converted = Encoding.UTF8.GetString(charBytes); + return converted[0]; + } + + static void SkipWhitespace(Stream s) + { + int readByte = s.ReadByte(); + while(readByte > -1 && char.IsWhiteSpace((char)readByte)) + { + readByte = s.ReadByte(); + } } } }