using System; using System.Collections.Generic; using System.IO; using System.Text; namespace Assignment_1 { class Program { enum VariableFlags { Empty = 0, Reserved = 1, NoPrint = 2 } /// /// This captures the end-point of each part of an expression (in the stream), to validate the syntax /// Optionally also captures the parsed string for each expression. /// /// For this program, a word is considered to be any non-whitespace value bounded by whitespace or the array boundary. /// /// static Dictionary> Symbols = new Dictionary> { { "SPACE", new Tuple(" ", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "TAB", new Tuple("\t", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "NEWLINE", new Tuple("\n", VariableFlags.Reserved | VariableFlags.NoPrint) }, { "CARRIAGE_RETURN", new Tuple("\r", VariableFlags.Reserved | VariableFlags.NoPrint) } }; static void Main(string[] args) { Console.WriteLine("┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓"); Console.WriteLine("┃ 159.341 2021 Semester 1, Assignment 1 ┃"); Console.WriteLine("┃ Submitted by Brychan Dempsey, 14299890 ┃"); Console.WriteLine("┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛"); MemoryStream sourceStream = new MemoryStream(1024); // Creates a memory stream to retain source while being interpreted. Parser parser = new Parser(); bool dynamicInput = false; // From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected // Reading from pipes is equivalent to reading user input, though the input is redirected if (Console.IsInputRedirected) { sourceStream.Write(Encoding.UTF8.GetBytes(Console.In.ReadToEnd())); sourceStream.Position = 0; } else { sourceStream.Write(Encoding.UTF8.GetBytes("{\r\n")); sourceStream.Position = 0; dynamicInput = true; } parser.StartParsing(sourceStream, dynamicInput); } public class Parser { public enum statements { exit, append, list, print, printlength, printwords, printwordcount, set, reverse } public void StartParsing(Stream source, bool dynamicInput = false) { } #region Function Handling bool Append(Stream source) { string key; long advance = FindIdentifier(source, out key); if (advance < 0) { // Error on finding object return false; } else { source.Position = advance; } string value; advance = FindValue(source, out value); if (advance < 0) { // Error on parsing value return false; } else { source.Position = advance; } string eol; FindNextWord(source, out eol); if (eol[0] != ';') { // Expected end-of-statement/end-of-line (;) return false; } if (Symbols[key].Item2 == VariableFlags.Reserved) { // Can't assign to reserved items return false; } Symbols[key] = new Tuple(Symbols[key].Item1 + value, Symbols[key].Item2); return true; } bool List() { Console.WriteLine("┌" + new string('─', 49) + "┐"); Console.WriteLine("│{0:-15}│{1:-25}│{2:9}│", "Symbol", "Value", "Flags"); Console.WriteLine("├" + new string('─', 15) + "┼" + new string('─', 25) + "┼" + new string('─', 9) + "┤"); int keyPos = Symbols.Count; foreach (var item in Symbols) { Console.WriteLine("│{0:-15}│{1:-25}│{2:9}│", item.Key, item.Value.Item1, Convert.ToString((byte)item.Value.Item2,2).PadLeft(8,'0')); if (keyPos == Symbols.Count-1) { Console.WriteLine("└" + new string('─', 15) + "┴" + new string('─', 25) + "┴" + new string('─', 9) + "┘"); } else { Console.WriteLine("├" + new string('─', 15) + "┼" + new string('─', 25) + "┼" + new string('─', 9) + "┤"); } } return true; } bool #endregion #region Data Handling // Data Handling /// /// Parses the expression from the point in the string /// /// /// /// long FindExpression(Stream s, out string expression) { // must contain at least one value string result; long wordEnd = FindValue(s, out result); while (true) { string nextWord; wordEnd = FindNextWord(s, out nextWord); if (wordEnd > 0 && nextWord == "+") { s.Position = wordEnd; } else { break; } s.Position = wordEnd; wordEnd = FindNextWord(s, out nextWord); result += nextWord; } expression = result; return wordEnd; } // Most atomic unit is 'value': /// /// Finds the next value in the stream /// /// /// /// long FindValue(Stream s, out string returnedValue) { SkipWhitespace(s); int result = s.ReadByte(); if (result == '\"') { return FindLiteral(s, out returnedValue); } else { string keyValue; long t = FindIdentifier(s, out keyValue); returnedValue = Symbols[keyValue].Item1; return t; } } long FindIdentifier(Stream s, out string returnedKey) { string identifier; long wordEnd = FindNextWord(s, out identifier); // Lookup the value in the symbol table try { returnedKey = Symbols[identifier].Item1; } catch (KeyNotFoundException e) { Console.WriteLine("Could not find a defined variable with the name {0}", identifier); Console.Error.WriteLine(e); returnedKey = ""; return -1; } return wordEnd; } long FindLiteral(Stream s, out string returnedLiteral) { // Is a literal. Now we must parse until we find the end of the literal string resultLiteral; long resultPosition = FindNextOccurance(s, (c, s) => { if (c == '\"') { long pos = s.Position--; if (GetChar(s) == '\\') { // TODO: handle the \\ escape return false; } else { return true; } } return false; }, out resultLiteral); if (resultPosition > -1) { returnedLiteral = resultLiteral; } else { returnedLiteral = ""; } return resultPosition; } #endregion } /// /// Reads the memory stream as a UTF-8 encoded string until the next occurance of '\n' or '\r\n' (consuming, and excluded) /// /// /// static string GetNextLine(Stream s) { string nextLine; FindNextOccurance(s, '\n', out nextLine); return nextLine; } /// /// Finds the next word in the string /// /// /// /// A value <0 if an error occurred, else the position of the end of the word static long FindNextWord(string s, out string nextWord) { // remove whitespace from the start int wordStart = 0; if (char.IsWhiteSpace(s[0])) { for (int i = 0; i < s.Length; i++) { if (char.IsWhiteSpace(s[i])) break; wordStart = i; } } int wordEnd = wordStart; for (int i = wordEnd; i < s.Length; i++) { if (char.IsWhiteSpace(s[i])) break; wordEnd = i; } nextWord = s.Substring(wordStart, wordEnd); return wordEnd; } /// /// Finds the end-boundary of the next word in the stream /// /// /// /// static long FindNextWord(Stream s, out string nextWord) { return FindNextOccurance(s, (c, s) => Char.IsWhiteSpace(c), out nextWord); } /// /// Finds and returns the position of the next occurance of the Func returning true. /// /// /// /// /// static long FindNextOccurance(Stream s, Func p, out string result) { long start = s.Position; StringBuilder sb = new StringBuilder(); bool charFound = false; while (!charFound) { char nextChar = GetChar(s); if (p(nextChar, s)) { /*if (c == '\n') { s.Position--; if (s.ReadByte() != '\r') s.Position--; // Avoid capturing the carriage return }*/ charFound = true; } else { sb.Append(nextChar); } } result = sb.ToString(); long newPosition = s.Position; s.Position = start; return newPosition; } /// /// Finds the next position of the character /// /// /// /// Captures the string read in searching for the character /// static long FindNextOccurance(Stream s, char c, out string result) { return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result); } static char GetChar(Stream s) { // As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected // value. // Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size int readAmount = 0; int firstChar = s.ReadByte(); if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character { readAmount = 3; } else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte { readAmount = 2; } else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte { readAmount = 1; } byte[] charBytes = new byte[readAmount + 1]; charBytes[0] = (byte)firstChar; for (int i = 1; i < readAmount; i++) { int nextChar = s.ReadByte(); if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!"); charBytes[i] = (byte)nextChar; } string converted = Encoding.UTF8.GetString(charBytes); return converted[0]; } static void SkipWhitespace(Stream s) { int readByte = s.ReadByte(); while(readByte > -1 && char.IsWhiteSpace((char)readByte)) { readByte = s.ReadByte(); } } } }