NAIVE BAYES ALGORITHM WITH SQL & PL/SQL

articles: 

--NAIVE BAYES ALGORITHM WITH SQL & PL/SQL
---------------------------------------------------------------------------------
--Written JP Vijaykumar
--Date Jan 4th 2019

--This script is provided for educational purpose only.
--The readers are advised to make necessary changes as may be required for their use.

--I love data mining algorithms for their complexity and number crunching toughest logic.
--Academically, I want to generate Naive Bayes algorythm in pl/sql to predict the outcome.

-------------------------------------------------------
OUTLOOK | TEMPERATURE | HUMIDITY | WIND | PLAY_BALL |
-------------------------------------------------------
Rain | Cool | High | Strong | ? |
-------------------------------------------------------

--For these scripts, I used a 12c oracle on windows platform.
--I used gitbash console to access my 12c oracle database.

export ORACLE_SID=orcl
export ORACLE_HOME=c:/app/oracle/product/12.2.0/dbhome_1

$ORACLE_HOME/bin/sqlplus veeksha/saketh

alter session set container=orclpdb;
alter database orclpdb open;

create or replace directory dp_dir as 'c:/jpscripts';

drop table xtern_decision_jp;
create table xtern_decision_jp (
outlook varchar2(10),
temperature varchar2(10),
humidity varchar2(10),
wind varchar2(10),
play_ball varchar2(10))
organization external
( default directory dp_dir
access parameters
( records delimited by newline
fields terminated by '|'
)
location ('decisiontree_data.txt')
);

--From the training dataset, I will predict the outcome for the above test data.

select * from xtern_decision_jp;

OUTLOOK TEMPERATUR HUMIDITY WIND PLAY_BALL
---------- ---------- ---------- ---------- ----------
Sunny Hot High Weak No
Sunny Hot High Strong No
Overcast Hot High Weak Yes
Rain Mild High Weak Yes
Rain Cool Normal Weak Yes
Rain Cool Normal Strong No
Overcast Cool Normal Strong Yes
Sunny Mild High Weak No
Sunny Cool Normal Weak Yes
Rain Mild Normal Weak Yes
Sunny Mild Normal Strong Yes
Overcast Mild High Strong Yes
Overcast Hot Normal Weak Yes
Rain Mild High Strong No

14 rows selected.

with cnt as
(select count(*) val from xtern_decision_jp),
--YES VALUES CALCULATION
yp as
(select count(*) val from xtern_decision_jp where play_ball='Yes'),
yop as
(select count(*) val from xtern_decision_jp where outlook='Rain' and play_ball='Yes'),
ytp as
(select count(*) val from xtern_decision_jp where temperature='Cool' and play_ball='Yes'),
yhp as
(select count(*) val from xtern_decision_jp where humidity='High' and play_ball='Yes'),
ywp as
(select count(*) val from xtern_decision_jp where wind='Strong' and play_ball='Yes'),
y as
(select (yop.val/yp.val)*(ytp.val/yp.val)*(yhp.val/yp.val)*(ywp.val/yp.val)*
(yp.val/cnt.val) val from cnt,yp,yop,ytp,yhp,ywp),
--NO VALUES CALCULATION
np as
(select count(*) val from xtern_decision_jp where play_ball='No'),
nop as
(select count(*) val from xtern_decision_jp where outlook='Rain' and play_ball='No'),
ntp as
(select count(*) val from xtern_decision_jp where temperature='Cool' and play_ball='No'),
nhp as
(select count(*) val from xtern_decision_jp where humidity='High' and play_ball='No'),
nwp as
(select count(*) val from xtern_decision_jp where wind='Strong' and play_ball='No'),
n as
(select (nop.val/np.val)*(ntp.val/np.val)*(nhp.val/np.val)*(nwp.val/np.val)*
(np.val/cnt.val) val from cnt,np,nop,ntp,nhp,nwp)
select round(y.val/(y.val + n.val),6) yval, round(n.val/(y.val + n.val),6) nval from y,n;

YVAL NVAL
---------- ----------
.366569 .633431

set serverout on size 1000000 timing on
declare
v_table varchar2(30):='XTERN_DECISION_JP';
v_clsfr varchar2(30):='PLAY_BALL'; v_rslt1 varchar2(3) :='Yes'; v_rslt2 varchar2(3):='No';
v_colm1 varchar2(30):='OUTLOOK'; v_clvl1 varchar2(30):='Rain';
v_colm2 varchar2(30):='TEMPERATURE'; v_clvl2 varchar2(30):='Cool';
v_colm3 varchar2(30):='HUMIDITY'; v_clvl3 varchar2(30):='High';
v_colm4 varchar2(30):='WIND'; v_clvl4 varchar2(30):='Strong';
v_reslt varchar2(30):='';
v_ysval number(10,6);
v_noval number(10,6);
begin
/*
execute immediate 'select outlook,temperature,humidity,wind,play_ball from (select rownum rnum, a.* from (select * from xtern_decision_jp) a) b where rnum =(select count(*) from xtern_decision_jp)'
into v_clvl1,v_clvl2,v_clvl3,v_clvl4,v_reslt;
*/
execute immediate
'with
t as
(select * from '||v_table||'), -- where rownum< (select count(*) from '||v_table||')),
cnt as
(select count(*) val from t),
yp as
(select count(*) val from t where '||v_clsfr||'='''||v_rslt1||'''),
yop as
(select count(*) val from t where '||v_colm1||'='''||v_clvl1||''' and '||v_clsfr||'='''||v_rslt1||'''),
ytp as
(select count(*) val from t where '||v_colm2||'='''||v_clvl2||''' and '||v_clsfr||'='''||v_rslt1||'''),
yhp as
(select count(*) val from t where '||v_colm3||'='''||v_clvl3||''' and '||v_clsfr||'='''||v_rslt1||'''),
ywp as
(select count(*) val from t where '||v_colm4||'='''||v_clvl4||''' and '||v_clsfr||'='''||v_rslt1||'''),
y as
(select (yop.val/yp.val)*(ytp.val/yp.val)*(yhp.val/yp.val)*(ywp.val/yp.val)*
(yp.val/cnt.val) val from cnt,yp,yop,ytp,yhp,ywp),
np as
(select count(*) val from t where '||v_clsfr||'='''||v_rslt2||'''),
nop as
(select count(*) val from t where '||v_colm1||'='''||v_clvl1||''' and '||v_clsfr||'='''||v_rslt2||'''),
ntp as
(select count(*) val from t where '||v_colm2||'='''||v_clvl2||''' and '||v_clsfr||'='''||v_rslt2||'''),
nhp as
(select count(*) val from t where '||v_colm3||'='''||v_clvl3||''' and '||v_clsfr||'='''||v_rslt2||'''),
nwp as
(select count(*) val from t where '||v_colm4||'='''||v_clvl4||''' and '||v_clsfr||'='''||v_rslt2||'''),
n as
(select (nop.val/np.val)*(ntp.val/np.val)*(nhp.val/np.val)*(nwp.val/np.val)*
(np.val/cnt.val) val from cnt,np,nop,ntp,nhp,nwp)
select y.val/(y.val + n.val) , n.val/(y.val + n.val) from y,n' into v_ysval,v_noval;
dbms_output.put_line(v_clvl1||' '||v_clvl2||' '||v_clvl3||' '||v_clvl4||' '||v_reslt);
dbms_output.put_line('Yes: '||v_ysval||' No: '||v_noval);
exception
when others then
dbms_output.put_line(sqlerrm);
end;
/
Rain Cool High Strong
Yes: .366569 No: .633431

PL/SQL procedure successfully completed.

--USING NAIVE BAYES THEOREM IN PYTHON, VERIFIED THE RESULTS
No 0.6272
Yes 0.363

In both the above methods, the answer arrived at using Naive Bayes algorithm is a "NO".
Happy Scripting.