#define HGrids 9 #define VGrids 6 #define ShowStepInterval 1000 #define W_Empty ' ' #define W_Start 'S' #define W_Goal 'G' #define W_Obstacle 'O' #define NActions 4 #define ActionUp 0 #define ActionRight 1 #define ActionDown 2 #define ActionLeft 3 #define HistoryLength 100 typedef struct { short x, y, a; double reward; } Episode; typedef struct { short x, y, newx, newy; double reward; short a, hindex, hsize; Episode history[HistoryLength]; short WModel[VGrids][HGrids][NActions][2]; double WModelReward[VGrids][HGrids][NActions]; double QTable[VGrids][HGrids][NActions]; } Agent; typedef enum { OneStepQ, DynaQ, BackPropQ } LearningStrategy;