Restructured most of the project, clarified implementations
This commit is contained in:
parent
6d73e88105
commit
3905b46e75
@ -1,4 +1,5 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Text;
|
||||
|
||||
@ -6,6 +7,27 @@ namespace Assignment_1
|
||||
{
|
||||
class Program
|
||||
{
|
||||
enum VariableFlags
|
||||
{
|
||||
Empty = 0,
|
||||
Reserved = 1
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This captures the end-point of each part of an expression (in the stream), to validate the syntax
|
||||
/// Optionally also captures the parsed string for each expression.
|
||||
///
|
||||
/// For this program, a word is considered to be any non-whitespace value bounded by whitespace or the array boundary.
|
||||
///
|
||||
/// </summary>
|
||||
|
||||
static Dictionary<string, Tuple<string, VariableFlags>> Symbols = new Dictionary<string, Tuple<string, VariableFlags>>
|
||||
{
|
||||
{ "SPACE", new Tuple<string, VariableFlags>(" ", VariableFlags.Reserved) },
|
||||
{ "TAB", new Tuple<string, VariableFlags>("\t", VariableFlags.Reserved) },
|
||||
{ "NEWLINE", new Tuple<string, VariableFlags>("\n", VariableFlags.Reserved) },
|
||||
{ "CARRIAGE_RETURN", new Tuple<string, VariableFlags>("\r", VariableFlags.Reserved) }
|
||||
};
|
||||
static void Main(string[] args)
|
||||
{
|
||||
Console.WriteLine("┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓");
|
||||
@ -13,6 +35,9 @@ namespace Assignment_1
|
||||
Console.WriteLine("┃ Submitted by Brychan Dempsey, 14299890 ┃");
|
||||
Console.WriteLine("┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛");
|
||||
MemoryStream sourceStream = new MemoryStream(1024); // Creates a memory stream to retain source while being interpreted.
|
||||
|
||||
|
||||
|
||||
Parser parser = new Parser();
|
||||
bool dynamicInput = false;
|
||||
// From https://stackoverflow.com/questions/3453220/how-to-detect-if-console-in-stdin-has-been-redirected
|
||||
@ -28,76 +53,181 @@ namespace Assignment_1
|
||||
sourceStream.Position = 0;
|
||||
dynamicInput = true;
|
||||
}
|
||||
parser.FindProgram(sourceStream, dynamicInput);
|
||||
parser.StartParsing(sourceStream, dynamicInput);
|
||||
}
|
||||
public class Parser
|
||||
{
|
||||
public int FindProgram(Stream sourceStream, bool dynamicInput = false)
|
||||
public enum statements
|
||||
{
|
||||
if (sourceStream.ReadByte() == '{')
|
||||
{
|
||||
FindStatement();
|
||||
}
|
||||
else return -1; // Could not find the start of the program
|
||||
exit,
|
||||
append,
|
||||
list,
|
||||
print,
|
||||
printlength,
|
||||
printwords,
|
||||
printwordcount,
|
||||
set,
|
||||
reverse
|
||||
}
|
||||
public void StartParsing(Stream source, bool dynamicInput = false)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void Command_Exit()
|
||||
#region Function Handling
|
||||
bool Append(Stream source)
|
||||
{
|
||||
Environment.Exit(0);
|
||||
}
|
||||
|
||||
int FindStatement(Stream s)
|
||||
{
|
||||
string statement;
|
||||
if (dynamicInput)
|
||||
string key;
|
||||
long advance = FindIdentifier(source, out key);
|
||||
if (advance < 0)
|
||||
{
|
||||
Console.Write("Enter a \'Statement\': ");
|
||||
statement = Console.ReadLine();
|
||||
// Error on finding object
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
statement = GetNextLine(s);
|
||||
source.Position = advance;
|
||||
}
|
||||
|
||||
int wordBound = 0;
|
||||
string nextWord = GetNextWord(statement, out wordBound);
|
||||
switch (nextWord)
|
||||
string value;
|
||||
advance = FindValue(source, out value);
|
||||
if (advance < 0)
|
||||
{
|
||||
case "append":
|
||||
FindIdentifier();
|
||||
FindExpression();
|
||||
break;
|
||||
case "list":
|
||||
break;
|
||||
case "exit":
|
||||
Environment.Exit(0);
|
||||
break;
|
||||
case "print":
|
||||
break;
|
||||
case "printlength":
|
||||
break;
|
||||
case "printwords":
|
||||
break;
|
||||
case "printwordcount":
|
||||
break;
|
||||
case "set":
|
||||
break;
|
||||
case "reverse":
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
// Error on parsing value
|
||||
return false;
|
||||
}
|
||||
|
||||
// Look for further elements
|
||||
return 1;
|
||||
else
|
||||
{
|
||||
source.Position = advance;
|
||||
}
|
||||
string eol;
|
||||
FindNextWord(source, out eol);
|
||||
if (eol[0] != ';')
|
||||
{
|
||||
// Expected end-of-statement/end-of-line (;)
|
||||
return false;
|
||||
}
|
||||
if (Symbols[key].Item2 == VariableFlags.Reserved)
|
||||
{
|
||||
// Can't assign to reserved items
|
||||
return false;
|
||||
}
|
||||
Symbols[key] = new Tuple<string, VariableFlags>(Symbols[key].Item1 + value, Symbols[key].Item2);
|
||||
return true;
|
||||
}
|
||||
|
||||
int FindIdentifier(Stream s)
|
||||
#endregion
|
||||
#region Data Handling
|
||||
// Data Handling
|
||||
/// <summary>
|
||||
/// Parses the expression from the point in the string
|
||||
/// </summary>
|
||||
/// <param name="s"></param>
|
||||
/// <param name="expression"></param>
|
||||
/// <returns></returns>
|
||||
long FindExpression(Stream s, out string expression)
|
||||
{
|
||||
|
||||
// must contain at least one value
|
||||
string result;
|
||||
long wordEnd = FindValue(s, out result);
|
||||
while (true)
|
||||
{
|
||||
string nextWord;
|
||||
wordEnd = FindNextWord(s, out nextWord);
|
||||
if (wordEnd > 0 && nextWord == "+")
|
||||
{
|
||||
s.Position = wordEnd;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
s.Position = wordEnd;
|
||||
wordEnd = FindNextWord(s, out nextWord);
|
||||
result += nextWord;
|
||||
}
|
||||
expression = result;
|
||||
return wordEnd;
|
||||
}
|
||||
|
||||
|
||||
// Most atomic unit is 'value':
|
||||
/// <summary>
|
||||
/// Finds the next value in the stream
|
||||
/// </summary>
|
||||
/// <param name="s"></param>
|
||||
/// <param name="returnedValue"></param>
|
||||
/// <returns></returns>
|
||||
long FindValue(Stream s, out string returnedValue)
|
||||
{
|
||||
SkipWhitespace(s);
|
||||
int result = s.ReadByte();
|
||||
if (result == '\"')
|
||||
{
|
||||
return FindLiteral(s, out returnedValue);
|
||||
}
|
||||
else
|
||||
{
|
||||
string keyValue;
|
||||
long t = FindIdentifier(s, out keyValue);
|
||||
returnedValue = Symbols[keyValue].Item1;
|
||||
return t;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
long FindIdentifier(Stream s, out string returnedKey)
|
||||
{
|
||||
string identifier;
|
||||
long wordEnd = FindNextWord(s, out identifier);
|
||||
// Lookup the value in the symbol table
|
||||
try
|
||||
{
|
||||
returnedKey = Symbols[identifier].Item1;
|
||||
}
|
||||
catch (KeyNotFoundException e)
|
||||
{
|
||||
Console.WriteLine("Could not find a defined variable with the name {0}", identifier);
|
||||
Console.Error.WriteLine(e);
|
||||
returnedKey = "";
|
||||
return -1;
|
||||
}
|
||||
return wordEnd;
|
||||
}
|
||||
|
||||
long FindLiteral(Stream s, out string returnedLiteral)
|
||||
{
|
||||
// Is a literal. Now we must parse until we find the end of the literal
|
||||
string resultLiteral;
|
||||
long resultPosition = FindNextOccurance(s, (c, s) =>
|
||||
{
|
||||
if (c == '\"')
|
||||
{
|
||||
long pos = s.Position--;
|
||||
if (GetChar(s) == '\\')
|
||||
{
|
||||
// TODO: handle the \\ escape
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}, out resultLiteral);
|
||||
if (resultPosition > -1)
|
||||
{
|
||||
returnedLiteral = resultLiteral;
|
||||
}
|
||||
else
|
||||
{
|
||||
returnedLiteral = "";
|
||||
}
|
||||
return resultPosition;
|
||||
}
|
||||
#endregion
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/// <summary>
|
||||
@ -107,52 +237,17 @@ namespace Assignment_1
|
||||
/// <returns></returns>
|
||||
static string GetNextLine(Stream s)
|
||||
{
|
||||
long start = s.Position;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
bool newLineFound = false;
|
||||
while (!newLineFound)
|
||||
{
|
||||
// As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected
|
||||
// value.
|
||||
// Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size
|
||||
int readAmount = 0;
|
||||
int firstChar = s.ReadByte();
|
||||
if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character
|
||||
{
|
||||
readAmount = 3;
|
||||
}
|
||||
else if((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte
|
||||
{
|
||||
readAmount = 2;
|
||||
}
|
||||
else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte
|
||||
{
|
||||
readAmount = 1;
|
||||
}
|
||||
|
||||
byte[] charBytes = new byte[readAmount + 1];
|
||||
charBytes[0] = (byte)firstChar;
|
||||
for (int i = 1; i < readAmount; i++)
|
||||
{
|
||||
int nextChar = s.ReadByte();
|
||||
if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!");
|
||||
charBytes[i] = (byte)nextChar;
|
||||
}
|
||||
string converted = Encoding.UTF8.GetString(charBytes);
|
||||
if (converted == "\r" || converted == "\n")
|
||||
{
|
||||
if (s.ReadByte() != '\n') s.Position--; // Return the position if the next character isn't a new line
|
||||
newLineFound = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(converted);
|
||||
}
|
||||
}
|
||||
return sb.ToString();
|
||||
string nextLine;
|
||||
FindNextOccurance(s, '\n', out nextLine);
|
||||
return nextLine;
|
||||
}
|
||||
|
||||
static string GetNextWord(string s, out int wordEndPos)
|
||||
/// <summary>
|
||||
/// Finds the next word in the string
|
||||
/// </summary>
|
||||
/// <param name="s"></param>
|
||||
/// <param name="nextWord"></param>
|
||||
/// <returns>A value <0 if an error occurred, else the position of the end of the word</returns>
|
||||
static long FindNextWord(string s, out string nextWord)
|
||||
{
|
||||
// remove whitespace from the start
|
||||
int wordStart = 0;
|
||||
@ -170,8 +265,110 @@ namespace Assignment_1
|
||||
if (char.IsWhiteSpace(s[i])) break;
|
||||
wordEnd = i;
|
||||
}
|
||||
wordEndPos = wordEnd;
|
||||
return s.Substring(wordStart, wordEnd);
|
||||
|
||||
nextWord = s.Substring(wordStart, wordEnd);
|
||||
return wordEnd;
|
||||
}
|
||||
/// <summary>
|
||||
/// Finds the end-boundary of the next word in the stream
|
||||
/// </summary>
|
||||
/// <param name="s"></param>
|
||||
/// <param name="nextWord"></param>
|
||||
/// <returns></returns>
|
||||
static long FindNextWord(Stream s, out string nextWord)
|
||||
{
|
||||
return FindNextOccurance(s, (c, s) => Char.IsWhiteSpace(c), out nextWord);
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Finds and returns the position of the next occurance of the Func returning true.
|
||||
/// </summary>
|
||||
/// <param name="s"></param>
|
||||
/// <param name="p"></param>
|
||||
/// <param name="result"></param>
|
||||
/// <returns></returns>
|
||||
static long FindNextOccurance(Stream s, Func<char, Stream, bool> p, out string result)
|
||||
{
|
||||
long start = s.Position;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
bool charFound = false;
|
||||
while (!charFound)
|
||||
{
|
||||
char nextChar = GetChar(s);
|
||||
if (p(nextChar, s))
|
||||
{
|
||||
/*if (c == '\n')
|
||||
{
|
||||
s.Position--;
|
||||
if (s.ReadByte() != '\r') s.Position--;
|
||||
// Avoid capturing the carriage return
|
||||
}*/
|
||||
charFound = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
sb.Append(nextChar);
|
||||
}
|
||||
}
|
||||
|
||||
result = sb.ToString();
|
||||
long newPosition = s.Position;
|
||||
s.Position = start;
|
||||
return newPosition;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Finds the next position of the character
|
||||
/// </summary>
|
||||
/// <param name="s"></param>
|
||||
/// <param name="c"></param>
|
||||
/// <param name="result">Captures the string read in searching for the character</param>
|
||||
/// <returns></returns>
|
||||
static long FindNextOccurance(Stream s, char c, out string result)
|
||||
{
|
||||
return FindNextOccurance(s, (streamChar, s) => streamChar == c, out result);
|
||||
}
|
||||
|
||||
static char GetChar(Stream s)
|
||||
{
|
||||
// As UTF-8 allows codepoints to span multiple bytes, reading a single byte as a character will not always give the expected
|
||||
// value.
|
||||
// Fortunately, the standard ASCII table is 7-bits long. The 8th bit is used to determine the character size
|
||||
int readAmount = 0;
|
||||
int firstChar = s.ReadByte();
|
||||
if ((firstChar >> 3) == 0x1E) // 11110xxx implies a 4-byte length character
|
||||
{
|
||||
readAmount = 3;
|
||||
}
|
||||
else if ((firstChar >> 4) == 0xE) // 1110xxxx, 3-byte
|
||||
{
|
||||
readAmount = 2;
|
||||
}
|
||||
else if ((firstChar >> 5) == 0x6) // 110xxxxx, 2-byte
|
||||
{
|
||||
readAmount = 1;
|
||||
}
|
||||
|
||||
byte[] charBytes = new byte[readAmount + 1];
|
||||
charBytes[0] = (byte)firstChar;
|
||||
for (int i = 1; i < readAmount; i++)
|
||||
{
|
||||
int nextChar = s.ReadByte();
|
||||
if (nextChar >> 6 != 2) throw new Exception("Character is not a valid UTF-8 code point!");
|
||||
charBytes[i] = (byte)nextChar;
|
||||
}
|
||||
string converted = Encoding.UTF8.GetString(charBytes);
|
||||
return converted[0];
|
||||
}
|
||||
|
||||
static void SkipWhitespace(Stream s)
|
||||
{
|
||||
int readByte = s.ReadByte();
|
||||
while(readByte > -1 && char.IsWhiteSpace((char)readByte))
|
||||
{
|
||||
readByte = s.ReadByte();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user