dm 'log;clear;output;clear;'; options ps=50 ls=70 pageno=1; goptions reset=global border ftext=swiss gunit=cm htext=0.4 htitle=0.5; goptions display noprompt; **********************************************************************; ** **; ** AUTHOR: Chris Bilder **; ** COURSE: STAT 5063 **; ** DATE: 3-31-01 **; ** UPDATE: **; ** PURPOSE: Find a logistic regression model for the placekicking **; ** data set **; ** **; ** NOTES: Below is a description of the variables: **; ** Date - Date of the game (month/day/year) **; ** Week - Week of the season **; ** Location - Location of the game using a two letter abbreviation **; ** Time - Time the game started (hour and minutes) **; ** Type - Type of stadium (O=outdoor, D=Dome) **; ** Field - Type of field (G=grass, T=artificial turf) **; ** Temp - Termperature at game time **; ** Humid - Humidity at game time **; ** Dir - Direction of the wind **; ** Speed - Speed of the wind **; ** Cloud - Sky conditions (SN=Sunny,...) **; ** Precip - Is precipation falling? (Y=yes,__=No information) **; ** Kicker - Placekicker's name **; ** Team - The team of the placekicker **; ** Opp - The opposing team **; ** Good - Was the placekick good? (Y=Yes, N=No) **; ** How - If the placekick was no good, how was the placekick missed**; ** (WR=wide right,...) **; ** PAT - Type of placekick (Y=PAT, N=field goal) **; ** Dist - Distance of the placekick **; ** Qrtr - Quarter of the game **; ** SC_team - Score of the placekicker's team right before the **; ** placekick **; ** SC_opp - Score of the opponent right before the placekick **; ** Win - Did the placekicker's team win? (Y=Yes, N=No) **; ** Fin_team - Final score of the placekicker's team **; ** Fin_opp - Final score of the opponent **; ** windy - binary variable for wind speed > 15 mph at game time **; ** (wind=1) versus <=15 mph at game time (wind=0) - note **; ** that dome stadiums placekicks receive a wind=0 value **; ** diff - difference in score right before the placekick **; ** change - Will a successful placekick cause a lead change? **; ** (change=1 if it will and 0 otherwise) **; ** elap30 - Time remaining in the half (0 for Overtime) **; ** **; ** All Oakland games removed since no information is available for **; ** their home games **; **********************************************************************; title1 'Chris Bilder, STAT 5063'; libname chris 'c:/chris/unl/stat873/chapter 1'; data set2; set chris.placekick; if PAT = "Y" then PAT1 = 1; else PAT1 = 0; if Type = "O" then type1 =1; else type1=0; if Field = "G" then field1=1; else field1=0; if Good = "Y" then good1=1; else good1=0; change = 0; if diff >=-3 and diff <=0 and PAT='N' then change = 1; if diff >=-1 and diff <=0 and PAT='Y' then change = 1; IF SPEED > 15 THEN WIND = 1; IF SPEED >= 0 AND SPEED <=15 THEN WIND=0; IF TYPE='D' THEN WIND = 0; dist_win = dist*wind; *if wind >=0; *Remove these observations because of some large residuals - * this limits the population of inference!; if PAT="Y" and dist ne 20 then delete; keep good1 dist pat1 wind change type1 field1 Week elap30 dist_win ; run; libname chris3 'c:/chris/unl/stat873/chapter 7'; *Get validation data set; data valid1; set chris3.valid; if PAT = "Y" then PAT1 = 1; else PAT1 = 0; if Type = "O" then type1 =1; else type1=0; if Field = "G" then field1=1; else field1=0; if Good = "Y" then good1=1; else good1=0; change = 0; if diff >=-3 and diff <=0 and PAT='N' then change = 1; if diff >=-1 and diff <=0 and PAT='Y' then change = 1; IF SPEED > 15 THEN WIND = 1; IF SPEED >= 0 AND SPEED <=15 THEN WIND=0; IF TYPE='D' THEN WIND = 0; dist_win = dist*wind; *if wind >=0; *Remove these observations because of some large residuals in original data set - * this limits the population of inference!; if PAT="Y" and dist ne 20 then delete; keep good1 dist pat1 wind change type1 field1 Week elap30 dist_win ; run; title2 'Logistic regression model with 4 variables'; proc logistic data=set2; model good1(event='1') = dist pat1 wind change / lackfit alpha=0.1 clodds=wald ctable pprob=0.1 0.2 0.3 0.4 0.45 0.5 0.6 0.7 0.8 0.9 outroc=out_roc ; units dist=1 10; output out=out_set1 predicted=pi_hat lower=low90 upper=up90 predprobs=individual / alpha=0.10; score data=valid1 out=out_score fitstat; run; proc gplot data=out_roc; title2 'ROC Curve'; plot _sensit_*_1mspec_ / vaxis=axis1 haxis=axis2 frame grid; axis1 label = (a=90 'Sensitivity') length = 12 order = (0 to 1 by .1); axis2 label = ('1-Specificity') length = 12 order = (0 to 1 by .1); symbol1 i=join v=none c=blue; run; data out_score; set out_score; *Try 0.4 cutoff value; if P_1 > 0.4 then class4=1; else class4=0; run; title2 'Summary of the classification results on the validation data set'; proc freq data=out_score; tables F_good1*I_good1 good1*class4 / nopercent nocol; run; *EXPERIMENTAL for SAS 9.1:; ods html; ods graphics on; title2 'Logistic regression model with 4 variables'; proc logistic data=set2; model good1(event='1') = dist pat1 wind change / lackfit alpha=0.1 clodds=wald ctable pprob=0.1 0.2 0.3 0.4 0.45 0.5 0.6 0.7 0.8 0.9 outroc=out_roc ; run; ods graphics off; ods html close; ***********************************************************************************; title2 'Logistic regression - Backward elimination'; proc logistic data=set2 ; model good1(event='1') = dist pat1 wind change type1 field1 Week elap30 / details selection=backward slstay=0.15; run; title2 'Logistic regression - Find classification error rate'; proc logistic data=set2 ; model good1(event='1') = dist pat1 wind change type1 field1 Week elap30 / ctable pprob=0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9; run; title2 'Logistic regression model in the Chance journal article'; proc logistic data=set2; model good1(event='1') = dist pat1 wind change dist_win / lackfit alpha=0.1 clodds=wald ctable pprob=0.1 0.2 0.3 0.4 0.45 0.5 0.6 0.7 0.8 0.9 outroc=out_roc; output out=out_set1 predicted=pi_hat lower=low90 upper=up90 predprobs=individual / alpha=0.10; run; *EXPERIMENTAL for SAS 9.1:; ods html; ods graphics on; title2 'Logistic regression model with 4 variables'; proc logistic data=set2; model good1(event='1') = dist pat1 wind change type1 field1 Week elap30 / details selection=backward slstay=0.15 outroc=out_roc ; run; ods graphics off; ods html close; ************************************************************************************; *Export data into ASCII text file; proc export data=set2 OUTFILE= "C:\chris\UNL\STAT873\Chapter 8\placekick.txt" DBMS=DLM REPLACE; RUN; *Export data into ASCII text file; proc export data=valid1 OUTFILE= "C:\chris\UNL\STAT873\Chapter 8\placekick_valid.txt" DBMS=DLM REPLACE; RUN; quit;