diff --git a/Assignment 1/Program.cs b/Assignment 1/Program.cs
index 31ff4c2..7a48992 100644
--- a/Assignment 1/Program.cs
+++ b/Assignment 1/Program.cs
@@ -1,4 +1,5 @@
using System;
+using System.Collections.Generic;
using System.IO;
using System.Text;
@@ -6,6 +7,27 @@ namespace Assignment_1
{
class Program
{
+ enum VariableFlags
+ {
+ Empty = 0,
+ Reserved = 1
+ }
+
+ ///
+ /// This captures the end-point of each part of an expression (in the stream), to validate the syntax
+ /// Optionally also captures the parsed string for each expression.
+ ///
+ /// For this program, a word is considered to be any non-whitespace value bounded by whitespace or the array boundary.
+ ///
+ ///
+
+ static Dictionary> Symbols = new Dictionary>
+ {
+ { "SPACE", new Tuple(" ", VariableFlags.Reserved) },
+ { "TAB", new Tuple("\t", VariableFlags.Reserved) },
+ { "NEWLINE", new Tuple("\n", VariableFlags.Reserved) },
+ { "CARRIAGE_RETURN", new Tuple("\r", VariableFlags.Reserved) }
+ };
static void Main(string[] args)
{
Console.WriteLine("┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓");
@@ -13,6 +35,9 @@ namespace Assignment_1
Console.WriteLine("┃ Submitted by Brychan Dempsey, 14299890 ┃");
Console.WriteLine("┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛");
MemoryStream sourceStream = new MemoryStream(1024); // Creates a memory stream to retain source while being interpreted.
+
+
+
Parser parser = new Parser();
bool dynamicInput = false;
// From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected
@@ -28,76 +53,181 @@ namespace Assignment_1
sourceStream.Position = 0;
dynamicInput = true;
}
- parser.FindProgram(sourceStream, dynamicInput);
+ parser.StartParsing(sourceStream, dynamicInput);
}
public class Parser
{
- public int FindProgram(Stream sourceStream, bool dynamicInput = false)
+ public enum statements
{
- if (sourceStream.ReadByte() == '{')
- {
- FindStatement();
- }
- else return -1; // Could not find the start of the program
+ exit,
+ append,
+ list,
+ print,
+ printlength,
+ printwords,
+ printwordcount,
+ set,
+ reverse
+ }
+ public void StartParsing(Stream source, bool dynamicInput = false)
+ {
+
}
- void Command_Exit()
+ #region Function Handling
+ bool Append(Stream source)
{
- Environment.Exit(0);
- }
-
- int FindStatement(Stream s)
- {
- string statement;
- if (dynamicInput)
+ string key;
+ long advance = FindIdentifier(source, out key);
+ if (advance < 0)
{
- Console.Write("Enter a \'Statement\': ");
- statement = Console.ReadLine();
+ // Error on finding object
+ return false;
}
else
{
- statement = GetNextLine(s);
+ source.Position = advance;
}
-
- int wordBound = 0;
- string nextWord = GetNextWord(statement, out wordBound);
- switch (nextWord)
+ string value;
+ advance = FindValue(source, out value);
+ if (advance < 0)
{
- case "append":
- FindIdentifier();
- FindExpression();
- break;
- case "list":
- break;
- case "exit":
- Environment.Exit(0);
- break;
- case "print":
- break;
- case "printlength":
- break;
- case "printwords":
- break;
- case "printwordcount":
- break;
- case "set":
- break;
- case "reverse":
- break;
- default:
- break;
+ // Error on parsing value
+ return false;
}
-
- // Look for further elements
- return 1;
+ else
+ {
+ source.Position = advance;
+ }
+ string eol;
+ FindNextWord(source, out eol);
+ if (eol[0] != ';')
+ {
+ // Expected end-of-statement/end-of-line (;)
+ return false;
+ }
+ if (Symbols[key].Item2 == VariableFlags.Reserved)
+ {
+ // Can't assign to reserved items
+ return false;
+ }
+ Symbols[key] = new Tuple(Symbols[key].Item1 + value, Symbols[key].Item2);
+ return true;
}
- int FindIdentifier(Stream s)
+ #endregion
+ #region Data Handling
+ // Data Handling
+ ///
+ /// Parses the expression from the point in the string
+ ///
+ ///
+ ///
+ ///
+ long FindExpression(Stream s, out string expression)
{
-
+ // must contain at least one value
+ string result;
+ long wordEnd = FindValue(s, out result);
+ while (true)
+ {
+ string nextWord;
+ wordEnd = FindNextWord(s, out nextWord);
+ if (wordEnd > 0 && nextWord == "+")
+ {
+ s.Position = wordEnd;
+ }
+ else
+ {
+ break;
+ }
+ s.Position = wordEnd;
+ wordEnd = FindNextWord(s, out nextWord);
+ result += nextWord;
+ }
+ expression = result;
+ return wordEnd;
}
+
+
+ // Most atomic unit is 'value':
+ ///
+ /// Finds the next value in the stream
+ ///
+ ///
+ ///
+ ///
+ long FindValue(Stream s, out string returnedValue)
+ {
+ SkipWhitespace(s);
+ int result = s.ReadByte();
+ if (result == '\"')
+ {
+ return FindLiteral(s, out returnedValue);
+ }
+ else
+ {
+ string keyValue;
+ long t = FindIdentifier(s, out keyValue);
+ returnedValue = Symbols[keyValue].Item1;
+ return t;
+
+ }
+ }
+
+ long FindIdentifier(Stream s, out string returnedKey)
+ {
+ string identifier;
+ long wordEnd = FindNextWord(s, out identifier);
+ // Lookup the value in the symbol table
+ try
+ {
+ returnedKey = Symbols[identifier].Item1;
+ }
+ catch (KeyNotFoundException e)
+ {
+ Console.WriteLine("Could not find a defined variable with the name {0}", identifier);
+ Console.Error.WriteLine(e);
+ returnedKey = "";
+ return -1;
+ }
+ return wordEnd;
+ }
+
+ long FindLiteral(Stream s, out string returnedLiteral)
+ {
+ // Is a literal. Now we must parse until we find the end of the literal
+ string resultLiteral;
+ long resultPosition = FindNextOccurance(s, (c, s) =>
+ {
+ if (c == '\"')
+ {
+ long pos = s.Position--;
+ if (GetChar(s) == '\\')
+ {
+ // TODO: handle the \\ escape
+ return false;
+ }
+ else
+ {
+ return true;
+ }
+ }
+ return false;
+ }, out resultLiteral);
+ if (resultPosition > -1)
+ {
+ returnedLiteral = resultLiteral;
+ }
+ else
+ {
+ returnedLiteral = "";
+ }
+ return resultPosition;
+ }
+ #endregion
}
-
+
///
@@ -107,52 +237,17 @@ namespace Assignment_1
///
static string GetNextLine(Stream s)
{
- long start = s.Position;
- StringBuilder sb = new StringBuilder();
- bool newLineFound = false;
- while (!newLineFound)
- {
- // As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected
- // value.
- // Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size
- int readAmount = 0;
- int firstChar = s.ReadByte();
- if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character
- {
- readAmount = 3;
- }
- else if((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte
- {
- readAmount = 2;
- }
- else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte
- {
- readAmount = 1;
- }
-
- byte[] charBytes = new byte[readAmount + 1];
- charBytes[0] = (byte)firstChar;
- for (int i = 1; i < readAmount; i++)
- {
- int nextChar = s.ReadByte();
- if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!");
- charBytes[i] = (byte)nextChar;
- }
- string converted = Encoding.UTF8.GetString(charBytes);
- if (converted == "\r" || converted == "\n")
- {
- if (s.ReadByte() != '\n') s.Position--; // Return the position if the next character isn't a new line
- newLineFound = true;
- }
- else
- {
- sb.Append(converted);
- }
- }
- return sb.ToString();
+ string nextLine;
+ FindNextOccurance(s, '\n', out nextLine);
+ return nextLine;
}
-
- static string GetNextWord(string s, out int wordEndPos)
+ ///
+ /// Finds the next word in the string
+ ///
+ ///
+ ///
+ /// A value <0 if an error occurred, else the position of the end of the word
+ static long FindNextWord(string s, out string nextWord)
{
// remove whitespace from the start
int wordStart = 0;
@@ -170,8 +265,110 @@ namespace Assignment_1
if (char.IsWhiteSpace(s[i])) break;
wordEnd = i;
}
- wordEndPos = wordEnd;
- return s.Substring(wordStart, wordEnd);
+
+ nextWord = s.Substring(wordStart, wordEnd);
+ return wordEnd;
+ }
+ ///
+ /// Finds the end-boundary of the next word in the stream
+ ///
+ ///
+ ///
+ ///
+ static long FindNextWord(Stream s, out string nextWord)
+ {
+ return FindNextOccurance(s, (c, s) => Char.IsWhiteSpace(c), out nextWord);
+ }
+
+
+ ///
+ /// Finds and returns the position of the next occurance of the Func returning true.
+ ///
+ ///
+ ///
+ ///
+ ///
+ static long FindNextOccurance(Stream s, Func p, out string result)
+ {
+ long start = s.Position;
+ StringBuilder sb = new StringBuilder();
+ bool charFound = false;
+ while (!charFound)
+ {
+ char nextChar = GetChar(s);
+ if (p(nextChar, s))
+ {
+ /*if (c == '\n')
+ {
+ s.Position--;
+ if (s.ReadByte() != '\r') s.Position--;
+ // Avoid capturing the carriage return
+ }*/
+ charFound = true;
+ }
+ else
+ {
+ sb.Append(nextChar);
+ }
+ }
+
+ result = sb.ToString();
+ long newPosition = s.Position;
+ s.Position = start;
+ return newPosition;
+ }
+
+ ///
+ /// Finds the next position of the character
+ ///
+ ///
+ ///
+ /// Captures the string read in searching for the character
+ ///
+ static long FindNextOccurance(Stream s, char c, out string result)
+ {
+ return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result);
+ }
+
+ static char GetChar(Stream s)
+ {
+ // As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected
+ // value.
+ // Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size
+ int readAmount = 0;
+ int firstChar = s.ReadByte();
+ if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character
+ {
+ readAmount = 3;
+ }
+ else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte
+ {
+ readAmount = 2;
+ }
+ else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte
+ {
+ readAmount = 1;
+ }
+
+ byte[] charBytes = new byte[readAmount + 1];
+ charBytes[0] = (byte)firstChar;
+ for (int i = 1; i < readAmount; i++)
+ {
+ int nextChar = s.ReadByte();
+ if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!");
+ charBytes[i] = (byte)nextChar;
+ }
+ string converted = Encoding.UTF8.GetString(charBytes);
+ return converted[0];
+ }
+
+ static void SkipWhitespace(Stream s)
+ {
+ int readByte = s.ReadByte();
+ while(readByte > -1 && char.IsWhiteSpace((char)readByte))
+ {
+ readByte = s.ReadByte();
+ }
}
}
}