#define	HGrids	9
#define	VGrids	6
#define	ShowStepInterval	1000

#define	W_Empty		' '
#define	W_Start		'S'
#define	W_Goal		'G'
#define	W_Obstacle	'O'

#define	NActions	4
#define	ActionUp	0
#define	ActionRight	1
#define	ActionDown	2
#define	ActionLeft	3

#define	HistoryLength	100

typedef struct {
	short	x, y, a;
	double	reward;
} Episode;

typedef struct {
	short	x, y, newx, newy;
	double	reward;
	short	a, hindex, hsize;
	Episode		history[HistoryLength];
	short	WModel[VGrids][HGrids][NActions][2];
	double	WModelReward[VGrids][HGrids][NActions];
	double	QTable[VGrids][HGrids][NActions];
} Agent;

typedef enum {
	OneStepQ,
	DynaQ,
	BackPropQ
} LearningStrategy;