Commit 2b7cb4c3 authored by Ardavan_Roozkhosh's avatar Ardavan_Roozkhosh

meant for simplifying context free grammars

parent d834ee01
Pipeline #2739 failed with stages
import java.util.ArrayList;
import java.util.List;
public class Grammar {
/**
* List of all the states within this Grammar.
*/
private List<State> states;
/**
* List of all possible terminals(lowercase letters) within this Grammar.
*/
private List<Character> terminals;
/**
* Pointer to the start state for this Grammar.
*/
private State startState;
/**
* Gets the current list of states for this Grammar.
* @return the list of states
*/
public List<State> getStates() {
return states;
}
/**
* Sets a new list of states for this Grammar.
* @param states the new state list to be set
*/
public void setStates(List<State> states) {
this.states = states;
}
public List<State> removeState(State toRemove){
states.remove(toRemove);
return states;
}
/**
* Gets the current list of terminals for this Grammar.
* @return the list of terminals
*/
public List<Character> getTerminals() {
return terminals;
}
/**
* Sets a new list of terminals for this Grammar.
* @param terminals the new terminals to be set
*/
public void setTerminals(List<Character> terminals) {
this.terminals = terminals;
}
/**
* Gets the current start state of this Grammar.
* @return the current start state
*/
public State getStartState() {
return startState;
}
/**
* Sets a new start state for this Grammar.
* @param startState the new start state to be set
*/
public void setStartState(State startState) {
this.startState = startState;
}
/**
* Searches through the current list of states for the state with the name specified
* by the input char.
* @param stateName the name of the state to search for
* @return the requested state, or null if the state is not in the current list of states
*/
public State getStateWithName(char stateName) {
for (State state: states) {
if (state.getNonTerminal() == stateName) {
return state;
}
}
return null;
}
/**
* Removes any empty states within this grammar instance.
*/
public void removeEmptyStates() {
if (!states.isEmpty()) {
for (int indexOfState = 0; indexOfState < states.size(); indexOfState++) {
if (states.get(indexOfState).getDerivations().isEmpty()) {
states.remove(indexOfState);
}
}
}
else {
System.out.println("There are no states in this grammar, so no empty ones were removed.");
}
}
/*
* Builds and returns a string representing this Grammar in a form similar to the input file format.
* @return the string representation of the Grammar
*/
public String toString() {
String objectString = "";
objectString += "States: ";
//Step through the list of states, adding each one to the output string
for (State state: states) {
objectString += state.getNonTerminal();
if (!(states.indexOf(state) == (states.size()-1))) {
objectString += ", ";
}
}
//Step through the list of terminals, adding each one to the output string
objectString += "\n" + "Terminals: ";
for (char terminal: terminals) {
objectString += terminal;
if (!(terminals.indexOf(terminal) == (terminals.size()-1))) {
objectString += ", ";
}
}
objectString += "\n" + "Start State: " + startState.getNonTerminal() + "\n" + "Rules: " + "\n";
//Step through the list of rules, adding each one to the output string
for (State state: states) {
objectString += state.getNonTerminal() + ": ";
ArrayList<String> derivations = (ArrayList<String>) state.getDerivations();
for (String derivation: derivations) {
objectString += derivation;
if (!(derivations.indexOf(derivation) == (derivations.size()-1))) {
objectString += "|";
}
}
objectString += "\n";
}
return objectString;
}
}
/**
* @author ardavan roozkhosh
* Removes all of the unproductive and unreachable states from a grammar.
* @param cfg the grammar which will have its' useless states removed
*/
private void removeUselessStates(Grammar cfg){
//Initialize lists for productive/unproductive symbols
ArrayList<Character> unproductive = new ArrayList<Character>();
ArrayList<Character> productive = new ArrayList<Character>();
//add each non-terminal to the unproductive list
for(State state : cfg.getStates()){
unproductive.add(state.getNonTerminal());
}
//add each terminal to the productive list
for(Character term : cfg.getTerminals()){
productive.add(term);
}
int q=0;
boolean placeHolder = true;
//continue to check for symbols that can be changed to productive until
//there is an iteration in which no symbols are changed
while(placeHolder&&q<10){
placeHolder = false;
for(State state1 : cfg.getStates()){
//iterate through derivations
for(String deriv : state1.getDerivations()){
char[] derived = deriv.toCharArray();
int safeVerify = 0;
//iterate through characters in each derivation
//'i' represents the character in each iteration
for(char i : derived){
//check whether the character is productive
if(productive.contains(i)){
safeVerify++;
}
}
//if all of the states on the right hand side of a rule are productive, we must change the
//non-terminal on the left hand side of the rule to productive.
if(safeVerify==deriv.length())
{
//check to see that the non-terminal has not already been marked as productive
//if it hasn't, mark it as productive.
if(!productive.contains(state1.getNonTerminal())){
productive.add(state1.getNonTerminal());
int index=unproductive.indexOf(state1.getNonTerminal());
unproductive.remove(index);
placeHolder=true;
}
}
}
}
q++;
}
//Iterate through each state, and check if the state it-self is still marked as unproductive.
//If it is marked as unproductive, we must remove the whole state from the current list of states.
boolean toDelete = false;
for(State ridDeriv : cfg.getStates()){//
ArrayList<String> toRemove = new ArrayList<String>();
//check whether one of the states derivations contain an unproductive state. If it does, remove this
//derivation from this state.
for(String d : ridDeriv.getDerivations()){//
char[] derivation = d.toCharArray();
toDelete=false;
//for each state, go through each character of each derivation.
for(char c : derivation){
if(unproductive.contains(c))
toDelete=true;
}
//add the derivation to the list of to be removed from this state's derivations.
if(toDelete)
toRemove.add(d);
}
//remove all derivations from this state that include an unproductive character
ridDeriv.getDerivations().removeAll(toRemove);
}
//list of states that each new state will be added to, which will then be set as the new
//list of states for the context-free grammar (cfg) that was passed as a parameter.
ArrayList<State> newStates = new ArrayList<State>();
//iterate through the list of productive characters
//for each productive character, check whether it is a Non-terminal or not
//(by convention, non-terminals are upper-case characters (in this context 'A'-'Z'))
for(char productiveState : productive){
if(Character.isUpperCase(productiveState))
//iterate through current states of cfg, and if the state's non-terminal == productive character on this iteration,
//add this state to the new list of states
for(State check : cfg.getStates()){
if(check.getNonTerminal()==productiveState){
newStates.add(check);
}
}
}
cfg.setStates(newStates);
//This section performs the removeUnreachable algorithm, searching through the derivations of each state, starting with
//the start state, and marking each state that can be reached as reachable, and, once complete, removes all states that are
//not reachable from the grammar.
//Initialize a list to contain the reachable states, and insert the start state into that list
ArrayList<State> reachableStates = new ArrayList<State>();
reachableStates.add(cfg.getStartState());
//Check through each state in the grammar, and, if it has been denoted as reachable, search through its' derivations
//to look for all states that are reachable via the current state, and add them to the list of reachable states,
//if they have not been added already.
for (State currentState: cfg.getStates()) {
if (reachableStates.contains(currentState)) {
for (String currentDerivation: currentState.getDerivations()) {
char[] splitDerivation = currentDerivation.toCharArray();
for (char currentChar: splitDerivation) {
if (Character.isUpperCase(currentChar)) {
State reachableState = cfg.getStateWithName(currentChar);
if (!(reachableStates.contains(reachableState))) {
reachableStates.add(reachableState);
}
}
}
}
}
}
//Set the list of states to the list of reachable states
cfg.setStates(reachableStates);
//set the new list of terminals. This is used because if we delete all derivations that contain a specific terminal
//call it 'x' (due to the possibility that derivation containing 'x' may always contain an unreachable state) ,
//then 'x' is considered unreachable.
//If the above case is true, we must delete this terminal 'x' from cfg's current list of terminal characters.
ArrayList<Character> newTerminals = new ArrayList<Character>();
for(State s : cfg.getStates()){
for(String deriv : s.getDerivations()){
char[] derived = deriv.toCharArray();
for(char i : derived){
if(Character.isLowerCase(i) && !newTerminals.contains(i)){
newTerminals.add(i);
}
}
}
}
cfg.setTerminals(newTerminals);
}
/**
* Simplifies a grammar. (Removes epsilon derivations, unit productions, and useless states)
* @param cfg the grammar to be simplified
* @return the simplified grammar
*/
public Grammar simplify(Grammar cfg) {
removeUselessStates(cfg);
return cfg;
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment