401 lines
14 KiB
401 lines
14 KiB
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace Assignment_1
class Program
enum VariableFlags
Empty = 0,
Reserved = 1,
NoPrint = 2
/// <summary>
/// This captures the end-point of each part of an expression (in the stream), to validate the syntax
/// Optionally also captures the parsed string for each expression.
/// For this program, a word is considered to be any non-whitespace value bounded by whitespace or the array boundary.
/// </summary>
static Dictionary<string, Tuple<string, VariableFlags>> Symbols = new Dictionary<string, Tuple<string, VariableFlags>>
{ "SPACE", new Tuple<string, VariableFlags>(" ", VariableFlags.Reserved | VariableFlags.NoPrint) },
{ "TAB", new Tuple<string, VariableFlags>("\t", VariableFlags.Reserved | VariableFlags.NoPrint) },
{ "NEWLINE", new Tuple<string, VariableFlags>("\n", VariableFlags.Reserved | VariableFlags.NoPrint) },
{ "CARRIAGE_RETURN", new Tuple<string, VariableFlags>("\r", VariableFlags.Reserved | VariableFlags.NoPrint) }
static void Main(string[] args)
Console.WriteLine("┃ 159.341 2021 Semester 1, Assignment 1 ┃");
Console.WriteLine("┃ Submitted by Brychan Dempsey, 14299890 ┃");
MemoryStream sourceStream = new MemoryStream(1024); // Creates a memory stream to retain source while being interpreted.
Parser parser = new Parser();
bool dynamicInput = false;
// From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected
// Reading from pipes is equivalent to reading user input, though the input is redirected
if (Console.IsInputRedirected)
sourceStream.Position = 0;
sourceStream.Position = 0;
dynamicInput = true;
parser.StartParsing(sourceStream, dynamicInput);
public class Parser
public enum statements
public void StartParsing(Stream source, bool dynamicInput = false)
#region Function Handling
bool Append(Stream source)
string key;
long advance = FindIdentifier(source, out key);
if (advance < 0)
// Error on finding object
return false;
source.Position = advance;
string value;
advance = FindValue(source, out value);
if (advance < 0)
// Error on parsing value
return false;
source.Position = advance;
string eol;
FindNextWord(source, out eol);
if (eol[0] != ';')
// Expected end-of-statement/end-of-line (;)
return false;
if (Symbols[key].Item2 == VariableFlags.Reserved)
// Can't assign to reserved items
return false;
Symbols[key] = new Tuple<string, VariableFlags>(Symbols[key].Item1 + value, Symbols[key].Item2);
return true;
bool List()
Console.WriteLine("┌" + new string('─', 49) + "┐");
Console.WriteLine("│{0:-15}│{1:-25}│{2:9}│", "Symbol", "Value", "Flags");
Console.WriteLine("├" + new string('─', 15) + "┼" + new string('─', 25) + "┼" + new string('─', 9) + "┤");
int keyPos = Symbols.Count;
foreach (var item in Symbols)
Console.WriteLine("│{0:-15}│{1:-25}│{2:9}│", item.Key, item.Value.Item1, Convert.ToString((byte)item.Value.Item2,2).PadLeft(8,'0'));
if (keyPos == Symbols.Count-1)
Console.WriteLine("└" + new string('─', 15) + "┴" + new string('─', 25) + "┴" + new string('─', 9) + "┘");
Console.WriteLine("├" + new string('─', 15) + "┼" + new string('─', 25) + "┼" + new string('─', 9) + "┤");
return true;
#region Data Handling
// Data Handling
/// <summary>
/// Parses the expression from the point in the string
/// </summary>
/// <param name="s"></param>
/// <param name="expression"></param>
/// <returns></returns>
long FindExpression(Stream s, out string expression)
// must contain at least one value
string result;
long wordEnd = FindValue(s, out result);
while (true)
string nextWord;
wordEnd = FindNextWord(s, out nextWord);
if (wordEnd > 0 && nextWord == "+")
s.Position = wordEnd;
s.Position = wordEnd;
wordEnd = FindNextWord(s, out nextWord);
result += nextWord;
expression = result;
return wordEnd;
// Most atomic unit is 'value':
/// <summary>
/// Finds the next value in the stream
/// </summary>
/// <param name="s"></param>
/// <param name="returnedValue"></param>
/// <returns></returns>
long FindValue(Stream s, out string returnedValue)
int result = s.ReadByte();
if (result == '\"')
return FindLiteral(s, out returnedValue);
string keyValue;
long t = FindIdentifier(s, out keyValue);
returnedValue = Symbols[keyValue].Item1;
return t;
long FindIdentifier(Stream s, out string returnedKey)
string identifier;
long wordEnd = FindNextWord(s, out identifier);
// Lookup the value in the symbol table
returnedKey = Symbols[identifier].Item1;
catch (KeyNotFoundException e)
Console.WriteLine("Could not find a defined variable with the name {0}", identifier);
returnedKey = "";
return -1;
return wordEnd;
long FindLiteral(Stream s, out string returnedLiteral)
// Is a literal. Now we must parse until we find the end of the literal
string resultLiteral;
long resultPosition = FindNextOccurance(s, (c, s) =>
if (c == '\"')
long pos = s.Position--;
if (GetChar(s) == '\\')
// TODO: handle the \\ escape
return false;
return true;
return false;
}, out resultLiteral);
if (resultPosition > -1)
returnedLiteral = resultLiteral;
returnedLiteral = "";
return resultPosition;
/// <summary>
/// Reads the memory stream as a UTF-8 encoded string until the next occurance of '\n' or '\r\n' (consuming, and excluded)
/// </summary>
/// <param name="s"></param>
/// <returns></returns>
static string GetNextLine(Stream s)
string nextLine;
FindNextOccurance(s, '\n', out nextLine);
return nextLine;
/// <summary>
/// Finds the next word in the string
/// </summary>
/// <param name="s"></param>
/// <param name="nextWord"></param>
/// <returns>A value <0 if an error occurred, else the position of the end of the word</returns>
static long FindNextWord(string s, out string nextWord)
// remove whitespace from the start
int wordStart = 0;
if (char.IsWhiteSpace(s[0]))
for (int i = 0; i < s.Length; i++)
if (char.IsWhiteSpace(s[i])) break;
wordStart = i;
int wordEnd = wordStart;
for (int i = wordEnd; i < s.Length; i++)
if (char.IsWhiteSpace(s[i])) break;
wordEnd = i;
nextWord = s.Substring(wordStart, wordEnd);
return wordEnd;
/// <summary>
/// Finds the end-boundary of the next word in the stream
/// </summary>
/// <param name="s"></param>
/// <param name="nextWord"></param>
/// <returns></returns>
static long FindNextWord(Stream s, out string nextWord)
return FindNextOccurance(s, (c, s) => Char.IsWhiteSpace(c), out nextWord);
/// <summary>
/// Finds and returns the position of the next occurance of the Func returning true.
/// </summary>
/// <param name="s"></param>
/// <param name="p"></param>
/// <param name="result"></param>
/// <returns></returns>
static long FindNextOccurance(Stream s, Func<char, Stream, bool> p, out string result)
long start = s.Position;
StringBuilder sb = new StringBuilder();
bool charFound = false;
while (!charFound)
char nextChar = GetChar(s);
if (p(nextChar, s))
/*if (c == '\n')
if (s.ReadByte() != '\r') s.Position--;
// Avoid capturing the carriage return
charFound = true;
result = sb.ToString();
long newPosition = s.Position;
s.Position = start;
return newPosition;
/// <summary>
/// Finds the next position of the character
/// </summary>
/// <param name="s"></param>
/// <param name="c"></param>
/// <param name="result">Captures the string read in searching for the character</param>
/// <returns></returns>
static long FindNextOccurance(Stream s, char c, out string result)
return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result);
static char GetChar(Stream s)
// As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected
// value.
// Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size
int readAmount = 0;
int firstChar = s.ReadByte();
if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character
readAmount = 3;
else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte
readAmount = 2;
else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte
readAmount = 1;
byte[] charBytes = new byte[readAmount + 1];
charBytes[0] = (byte)firstChar;
for (int i = 1; i < readAmount; i++)
int nextChar = s.ReadByte();
if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!");
charBytes[i] = (byte)nextChar;
string converted = Encoding.UTF8.GetString(charBytes);
return converted[0];
static void SkipWhitespace(Stream s)
int readByte = s.ReadByte();
while(readByte > -1 && char.IsWhiteSpace((char)readByte))
readByte = s.ReadByte();