%---------------------------------------
% Foundations of Language Interaction
% Program One: Basic Agents
% M. Stone, 5-24-01
% mdstone@cs.rutgers.edu
%---------------------------------------

%---------------------------------------
% A Basic Agent Simulation Loop.
%---------------------------------------

module BasicAgent.

%---------------------------------------
% Data structures for deliberation
%    Agent gets a PERCEPT
%    Agent chooses an ACTION
%    Agent's STATE records its history
%        start: initial state
%        (see P S): just saw P in S
%        (do A S): just did A in S
%---------------------------------------

kind percept, action, state	type.

type start	state.
type see	percept -> state -> state.
type do		action -> state -> state.

%---------------------------------------
% Relations for agent simulation
%---------------------------------------

%---------------------------------------

type perceive	state -> percept -> o.

% (perceive S P) is true if the agent
%  observes P in state S.
% Implementation uses side-effects to query the user.

perceive State P :-
	output std_out "What does the agent see?\n",
	input_line std_in L,
	string_to_term L P.

%---------------------------------------

type execute	state -> action -> o.

% (execute S A) is always true, but handles
%   any side effects of executing A in S.
% Implementation just prints out information.

execute State A :-
	output std_out "Agent performs action ",
	term_to_string A S,
	output std_out S,
	output std_out ".\n\n".

%---------------------------------------

type choose	state -> action -> o.

% (choose S A) is defined for each particular
%   domain and agent.  It's true if the agent
%   may choose to do action A in state S.

%---------------------------------------

type agitate	state -> o.

% (agitate S) is always true - provided PERCEIVE
%    and CHOOSE relate each state to a percept
%    or action.
% As a side-effect, agitate S simulates the
%    complete future history of the agent,
%    starting from state S.
% Cut commits the agent to its first choice;
%    preventing time from flowing backward.

agitate State :-
	perceive State Percept,
	choose (see Percept State) Action,
	!,
	execute (see Percept State) Action,
	agitate (do Action (see Percept State)).

%%%%%
%---------------------------------------
% Implementation One.
%    A Symbolic, Reactive Agent
%    Video Game Domain
%---------------------------------------

module A1.
accumulate BasicAgent.

%---------------------------------------
% What you can see.
%---------------------------------------

type monster	percept.
type jewel	percept.   
type nothing	percept.

%---------------------------------------
% What you can do.
%---------------------------------------

type shoot	action.
type pickup	action.
type move	action.

%---------------------------------------
% What you do do.
%---------------------------------------

choose (see monster S) shoot.
choose (see jewel S) pickup.
choose (see nothing S) move.

%%%%%%
%---------------------------------------
% Higher-order mathematical programming
%---------------------------------------

module HOM.

%---------------------------------------
type sum	list A -> (A -> real -> o) -> real -> o.
type sumtr	list A -> (A -> real -> o) -> real -> real -> o.

% (sum L F V) is true if V is the sum for a in L of F a.
%    assumes F is "really" a function from A to real.

sumtr nil F V V.
sumtr (H::R) F S V :-
	F H O,
	S1 is S r+ O,
	sumtr R F S1 V.

sum L F V :-
	sumtr L F 0.0 V.

%---------------------------------------
type argmax	list A -> (A -> real -> o) -> A -> o.
type bigger	A -> real -> A -> real -> A -> real -> o.	
type argmaxtr	list A -> (A -> real -> o) -> A -> real -> A -> o.

% (argmax L F A) is true if A is that element E of L
%   for which F E is largest.

bigger A U B V A U :-
	U r> V, !.
bigger A U B V B V.

argmaxtr nil F A U A.
argmaxtr (H::R) F A U M :-
	F H V,
	bigger A U H V N W,
	argmaxtr R F N W M.

argmax (H::R) F A :-
	F H V,
	argmaxtr R F H V A.

%---------------------------------------

type	mappred		(A -> B -> o) -> (list A) -> (list B) -> o.

% mappred P (X1::X2::...::Xn) (Y1::Y2::...::Yn)
%	Succeeds if the predicate P relates Xi to Yi.

mappred P nil nil.
mappred P (X :: L) (Y :: K) :- P X Y, mappred P L K.

%%%%%%
%---------------------------------------
% Implementation Two.
%    A Bayesian, Reactive Agent
%    Video Game Domain
%---------------------------------------

module A2.
accumulate BasicAgent.
accumulate HOM.

%---------------------------------------
% Uncertainty is represented by a hidden variable
%---------------------------------------

kind variable	type.

%---------------------------------------
% How things seem.
%---------------------------------------

type monstery	percept.
type jewely	percept.   
type nothingy	percept.

%---------------------------------------
% How things really are.
%---------------------------------------

type monster	variable.
type jewel	variable.
type nothing	variable.

%---------------------------------------
% What you can do.
%---------------------------------------

type shoot	action.
type pickup	action.
type move	action.

%---------------------------------------
% Knowledge about the environment,
%    in a Bayesian framework.
%---------------------------------------

type prior	variable -> real -> o.
type likelihood	percept -> variable -> real -> o.
type u		action -> variable -> real -> o.

prior nothing 0.6.
prior jewel   0.2.
prior monster 0.2.

likelihood monstery monster 0.8 :- !.
likelihood jewely   jewel   0.8 :- !.
likelihood nothingy nothing 0.8 :- !.
likelihood Percept  Var	    0.1.

u shoot monster 0.5.
u move  monster 0.3.
u pickup monster 0.0.

u shoot jewel	0.5.
u move  jewel	0.5.
u pickup jewel  0.8.

u shoot nothing 0.4.
u pickup nothing 0.6.
u move nothing 0.6.

%---------------------------------------
type non_norm_u_c variable -> percept -> action -> real -> o.

% (non_norm_u_c V P A R) is true if R is the
%   contribution to the utility for a case
%   when the true state is V, we perceive P,
%   we choose action A -- given our probabilistic
%   model.

non_norm_u_c V O A R :-
	prior V P1,
	likelihood O V P2,
	u A V U,
	R is U r* P1 r* P2.

%---------------------------------------
% We implement choose by
%   picking whichever of the possible actions
%   maximizes our utility, 
%     summed over all possible hidden states,
%     weighted by their (non-normalized) posterior
%---------------------------------------

choose (see Percept State) Action :-
	argmax (shoot::pickup::move::nil)
	       (a\ u\ (sum (monster::jewel::nothing::nil) 
		            (h\ u\ non_norm_u_c h Percept a u)
		            u))
	       Action.

%%%%%%
%---------------------------------------
% Implementation Three.
%    A Reactive Agent with Logical Belief & Desire
%    Video Game Domain
%---------------------------------------

module A3.
accumulate BasicAgent.

%---------------------------------------
% The usual suspects.
%---------------------------------------

type monster	percept.
type jewel	percept.   
type nothing	percept.

type shoot	action.
type pickup	action.
type move	action.

%---------------------------------------
% Beliefs & Desire
%   The agent's beliefs and desires characterize states.
%    (This is called a fluent in AI.)
%   The qualitative characterization of states
%    means that this agent "understands" its
%    environment better than agents 1 and 2 - 
%    although we'll see that this understanding
%    can't necessarily translate into better
%    decisions.

%---------------------------------------
% Specific fluents.
%    danger and safety
%---------------------------------------
   
type in_danger		state -> o.
type safe		state -> o.
   
% You're in danger as long as there's an unshot monster.

in_danger (see monster _).
in_danger (do move S) :-
	in_danger S.
in_danger (do pickup S) :-
	in_danger S.

% Otherwise (and we must spell this out) you're safe.

safe start.
safe (see P S) :-
	not (in_danger (see P S)).

safe (do shoot S).
safe (do pickup S) :-
	safe S.
safe (do move S) :-
	safe S.

%---------------------------------------
% Specific fluents.
%    wealth
%---------------------------------------

type have_wealth	state -> int -> o.
   
% You have as much wealth as you've picked up jewels.

have_wealth start 0.
have_wealth (do pickup (see jewel S)) W :-
	!,
	have_wealth S WB,
	W is WB + 1.

have_wealth (do _ (see _ S)) W :-
	have_wealth S W.
	
have_wealth (see P S) W :- 
	have_wealth S W.

%---------------------------------------
% Specific fluents.
%    boredom and excitement
%---------------------------------------

type bored		state -> o.
type excited		state -> o.

% If you see nothing and haven't moved, you're bored.

bored (see nothing S).
bored (do shoot S) :-
	bored S.
bored (do pickup S) :-
	bored S.

% Otherwise (and we must spell this out) you're excited.

excited start.
excited (see P S) :- not (bored (see P S)).

excited (do move S).
excited (do shoot S) :-
	excited S.
excited (do pickup S) :-
	excited S.

%---------------------------------------
% Desire
% Rules for what's good, in preference order.
%---------------------------------------

type good		state -> o.

% It's better to get out of danger.

good (do A S) :-
	in_danger S,
	safe (do A S).

% It's better to get rich.

good (do A S) :-
	have_wealth S W,
	have_wealth (do A S) W2,
	safe (do A S),
	W2 > W.

% It's better not to be bored.

good (do A S) :-
	bored S,
	excited (do A S).	

%---------------------------------------
% You want something if you think it would be good.
% This translates into the basic implementataion
%  for CHOOSE here.
% Compare with agent 2: for decision theory we could
%  implement GOOD with an argmax over actions of
%  the action's utility 

choose (see Percept State) Action :-
	good (do Action (see Percept State)).

%%%%%%
%---------------------------------------
% Implementation Four.
%    An Agent with Logical Belief and Desire
%    in an extension of the video game
%    domain in which thinking ahead
%    makes sense.
%---------------------------------------

module A4.
accumulate A3.
import HOM.

%----------------------------------------
% The new feature of the environment:
%   you can find a package;
%   you should unwrap it, because it might have
%	something good inside.

type package	percept.
type unwrap	action.

%----------------------------------------
% To look ahead while avoiding infinite loops,
%  you need this little bit of causal knowledge:

type possible_to_perceive	state -> list percept -> o.

% You can't unwrap a package and find another package!

possible_to_perceive (do unwrap S) 
		     (monster :: jewel :: nothing :: nil) :-
	!.

% Otherwise, anything is possible.

possible_to_perceive _ (monster :: jewel :: nothing :: package :: nil).

%----------------------------------------
% Lookahead.
% A state is good if for each thing that you
% actually could perceive next, you can find
% an action that responds to it.

good S :-
	possible_to_perceive S PL,
	mappred (p\ a\ good (do a (see p S))) PL AL.