# The codes are for cross validatios of corporate exits. Smooth baseline hazard model. # Data sets. # Note that GVKEY = company ids, tindex =1 means the 1st quarter of 1979. #for sharcnet variables=read.table("/home/tbae/voldata/truc_com_cov.txt",header=TRUE) variables=variables[c(-2,-3)] exitdata=read.table("/home/tbae/voldata/exits.txt",header=TRUE) macroecono=read.table("/home/tbae/voldata/macroeco.txt",header=TRUE) loc=which((variables[,16] <= 31) | (variables[,16] >= 113)) variables=variables[-loc,] variables[,14]=variables[,14]*100 # for scaling to % ids=variables[,1] # company ids for all firms #selection of 8000 random sample gvkeys=as.numeric(levels(factor(ids))) sampled = sample(gvkeys,8000) sampledloc=numeric() for (i in sampled){ sampledloc=c(sampledloc,which(ids == i)) } sampled_variables=variables[sampledloc,] # to be used for estimation outsample_variables=variables[-sampledloc,] # to be used for prediction #exit data for sampled and non sampled firms sampled_exit_loc=numeric() for (i in sampled){ sampled_exit_loc=c(sampled_exit_loc,which(exitdata[,1] == i)) } sample_exit=exitdata[sampled_exit_loc,] outsample_exit=exitdata[-sampled_exit_loc,] # list of sampled variables by time points bysurv_sampled=by(sampled_variables,sampled_variables[,16],function(y){y}) # list of sampled exit data by reason of exit ybank_sampled=sample_exit[sample_exit[,4]==2 | sample_exit[,4]==3,] ymerg_sampled=sample_exit[sample_exit[,4]==1,] bybanked_sampled=by(ybank_sampled,ybank_sampled[,5],function(y){y}) bymerged_sampled=by(ymerg_sampled,ymerg_sampled[,5],function(y){y}) # list of out sample variables by time points bysurv_outsample=by(outsample_variables,outsample_variables[,16],function(y){y}) # list of outsample exit data by reason of exit ybank_outsample=outsample_exit[outsample_exit[,4]==2 | outsample_exit[,4]==3,] ymerg_outsample=outsample_exit[outsample_exit[,4]==1,] bybanked_outsample=by(ybank_outsample,ybank_outsample[,5],function(y){y}) bymerged_outsample=by(ymerg_outsample,ymerg_outsample[,5],function(y){y}) col_sums=function(matx){ if (is.vector(matx)){ matx=t(matrix(matx)) } if (dim(matx)[1]==0){ colsums=0 }else{ if (dim(matx)[1]==1){ colsums=matx }else{ colsums=colSums(matx) } } colsums } #estimation of baseline hazards given regression parameters and frailty parameters cor_baseline=function(bysurv,bybanked,bymerged,z,mle,arpara,ini,k,max.itr=1000){ q=k-32 phi1=arpara[1] theta1=arpara[2] phi2=arpara[3] theta2=arpara[4] sigma1=arpara[5] sigma2=arpara[6] rho=arpara[7] covmat=matrix(c(sigma1^2,rho*sigma1*sigma2,rho*sigma1*sigma2,sigma2^2),2,2) invcovmat=solve(covmat) phimat=matrix(c(phi1,0,0,phi2),2,2) thetavec=c(theta1,theta2) h=as.vector(ini) dist=1 j=0 while (dist > 1.0e-3){ j=j+1 if (j ==max.itr){ stop("max itr meets")} firstderiv=numeric() seconderiv=matrix(0,ncol=(2*q),nrow=(2*q)) for (r in 33:k){ locmacro=which(z[,12]==(r-1)) #macroeconomic variables macroeco=z[locmacro,] locbank=which(as.numeric(names(bybanked))==r) locmerg=which(as.numeric(names(bymerged))==r) dj1=ifelse(length(locbank)==0,0,dim(bybanked[[locbank]])[1]) dj2=ifelse(length(locmerg)==0,0,dim(bymerged[[locmerg]])[1]) #survivors locsurv=which(as.numeric(names(bysurv))==(r-1)) survivorfin=bysurv[[locsurv]] covresult=covariates(survivorfin, macroeco, mle) matsurv1=covresult[[1]] matsurv2=covresult[[2]] linear01=covresult[[3]] linear02=covresult[[4]] h01=exp(h[2*(r-33)+1]+linear01) h02=exp(h[2*(r-32)]+linear02) hdot=h01+h02 lamda01=h01/(1+hdot) lamda02=h02/(1+hdot) if (r==33){ gammaprime=-t(matrix(h[(2*(r-33)+1):(2*(r-32))]-thetavec))%*%invcovmat-t(matrix(h[(2*(r-32)+1):(2*(r-31))]-h[(2*(r-33)+1):(2*(r-32))]-phimat%*%matrix(thetavec-h[(2*(r-33)+1):(2*(r-32))])))%*%invcovmat%*%(phimat-diag(c(1,1))) }else{ if (r==k){ gammaprime=-t(matrix(h[(2*(r-33)+1):(2*(r-32))]-h[(2*(r-34)+1):(2*(r-33))]-phimat%*%matrix(thetavec-h[(2*(r-34)+1):(2*(r-33))])))%*%invcovmat }else{ gammaprime=-t(matrix(h[(2*(r-33)+1):(2*(r-32))]-h[(2*(r-34)+1):(2*(r-33))]-phimat%*%matrix(thetavec-h[(2*(r-34)+1):(2*(r-33))])))%*%invcovmat-t(matrix(h[(2*(r-32)+1):(2*(r-31))]-h[(2*(r-33)+1):(2*(r-32))]-phimat%*%matrix(thetavec-h[(2*(r-33)+1):(2*(r-32))])))%*%invcovmat%*%(phimat-diag(c(1,1))) } } firstderiv=c(firstderiv,gammaprime+c(dj1-sum(lamda01),dj2-sum(lamda02))) if (r==k){ gammadouble=-invcovmat }else{ gammadouble=-invcovmat-t(phimat-diag(c(1,1)))%*%invcovmat%*%(phimat-diag(c(1,1))) } seconderiv[(2*(r-33)+1):(2*(r-32)),(2*(r-33)+1):(2*(r-32))]=gammadouble+matrix(c(-sum(lamda01-lamda01^2),sum(lamda01*lamda02),sum(lamda02*lamda01),-sum(lamda02-lamda02^2)),2,2) # serial correlation part if (r < k){ seconderiv[(2*(r-33)+1):(2*(r-32)),(2*(r-32)+1):(2*(r-31))]=-t(phimat-diag(c(1,1)))%*%invcovmat seconderiv[(2*(r-32)+1):(2*(r-31)),(2*(r-33)+1):(2*(r-32))]=-t(phimat-diag(c(1,1)))%*%invcovmat } } #first derivative newh=matrix(h)-qr.solve(seconderiv)%*%matrix(firstderiv) dist=sqrt(sum((newh-matrix(h))^2)) h=t(newh) } list(series=h,score=firstderiv,fisher=-seconderiv) } paras=function(inibase,para,sigma,tind){ nbase=tind-32 base=matrix(inibase,2,nbase) lng=dim(base)[2] phi=c(para[1],para[3]) theta=c(para[2],para[4]) firstterm=t(matrix(base[,1]-theta))%*%solve(sigma)%*%matrix(base[,1]-theta) secondterm=0 for (i in 2:lng){ secondterm=secondterm+t(matrix(base[,i]-base[,i-1]-diag(phi)%*%(theta-base[,i-1])))%*%solve(sigma)%*%matrix(base[,i]-base[,i-1]-diag(phi)%*%(theta-base[,i-1])) } firstterm+secondterm } #regression parameter estimation given baseline and frailty parameters reg_para_frailty=function(x,y1,y2,z,regpara,baseline,arpara,tind){ u=length(regpara) phi1=arpara[1] phi2=arpara[3] theta1=arpara[2] theta2=arpara[4] sigma1=arpara[5] sigma2=arpara[6] rho=arpara[7] phimat=matrix(c(phi1,0,0,phi2),ncol=2) covmat=rbind(c(sigma1^2,rho*sigma1*sigma2),c(rho*sigma1*sigma2,sigma2^2)) thetavec=c(theta1,theta2) invcovmat=qr.solve(covmat) nbase=tind-32 newbase=matrix(baseline,2,nbase) distance = 1 while (distance > 1.0e-3){ Fisher=matrix(0,ncol=u,nrow=u) score=rep(0,u) loglikelihood=0 for (kk in 33:tind){ base=newbase[,kk-32] locmacro=which(z[,12]==(kk-1)) #macroeconomic variables macroeco=z[locmacro,] #survivors locsurv=which(as.numeric(names(x))==(kk-1)) survivorfin=x[[locsurv]] l=dim(survivorfin)[1] #exited locbank=which(as.numeric(names(y1))== kk) locmerg=which(as.numeric(names(y2))== kk) dj1=ifelse(length(locbank)==0,0,dim(y1[[locbank]])[1]) dj2=ifelse(length(locmerg)==0,0,dim(y2[[locmerg]])[1]) dj=dj1+dj2 if (dj > 0){ bankfin=ifelse(dj1 > 0,y1[[locbank]],numeric()) mergfin=ifelse(dj2 > 0,y2[[locmerg]],numeric()) exitcode=c(bankfin[[1]],mergfin[[1]]) survcode=survivorfin[,1] loc=numeric() for (key in exitcode){ loc=c(loc, which(survcode==key)) } exitcov=survivorfin[loc,] nonexitcov=survivorfin[-loc,] survivorfin=rbind(exitcov,nonexitcov) } #survivors ressurv=covariates(survivorfin, macroeco, regpara) matsurv1=ressurv[[1]] matsurv2=ressurv[[2]] linear01=ressurv[[3]] linear02=ressurv[[4]] if (dj >0){ matexit1=matsurv1[1:dj,] matexit2=matsurv2[1:dj,] linear11=linear01[1:dj] linear12=linear02[1:dj] if (dj1==0){ loglike1=sum(linear12[1:dj]+base[2]) score1=col_sums(matexit2[1:dj,]) } if (dj2==0){ loglike1=sum(linear11[1:dj1]+base[1]) score1=col_sums(matexit1[1:dj1,]) } if (dj1 >0 & dj2 > 0){ loglike1=sum(linear11[1:dj1]+base[1])+sum(linear12[(dj1+1):dj]+base[2]) score1=col_sums(matexit1[1:dj1,])+col_sums(matexit2[(dj1+1):dj,]) } }else{ loglike1=0 score1=rep(0,u) } h01=exp(base[1]+linear01) h02=exp(base[2]+linear02) hdot=h01+h02 loglike2=sum(log(1+hdot)) lamda01=h01/(1+hdot) lamda02=h02/(1+hdot) #lincommat=lamda01*as.data.frame(matsurv1)+lamda02*as.data.frame(matsurv2) #SS1=as.vector(colSums(lincommat)) #SS2= matrix(rowSums(apply(sqrt(lamda01)*as.data.frame(matsurv1), MARGIN=1,function(y){y%*%t(y)})),nrow=u)+ # + matrix(rowSums(apply(sqrt(lamda02)*as.data.frame(matsurv2), MARGIN=1,function(y){y%*%t(y)})),nrow=u)+ # - matrix(rowSums(apply(as.data.frame(lincommat), MARGIN=1,function(y){y%*%t(y)})),nrow=u) SS1=rep(0,u) SS2=matrix(0,ncol=u,nrow=u) for (w in 1:l){ ss1=lamda01[w]*matsurv1[w,]+lamda02[w]*matsurv2[w,] ss2=lamda01[w]*matrix(matsurv1[w,])%*%t(matrix(matsurv1[w,]))+lamda02[w]*matrix(matsurv2[w,])%*%t(matrix(matsurv2[w,])) SS1=SS1+ss1 SS2=SS2+ss2-matrix(ss1)%*%t(matrix(ss1)) } #smoothness penalty if (kk==33){ penalty=log(2*pi)+0.5*log(det(covmat))+0.5*(base-thetavec)%*%invcovmat%*%matrix((base-thetavec)) }else{ penalty=log(2*pi)+0.5*log(det(covmat))+0.5*(base-newbase[,kk-33]-t(phimat%*%matrix((thetavec-newbase[,kk-33]))))%*%invcovmat%*%(matrix(base-newbase[,kk-33])-phimat%*%matrix((thetavec-newbase[,kk-33]))) } loglikelihood=loglikelihood+loglike1-loglike2-penalty score=score+score1-SS1 Fisher=Fisher+SS2 snderiv=-Fisher } #looping from kk=33 to nbase newone= matrix(regpara) - qr.solve(snderiv)%*%matrix(score) distance=sqrt(sum( (t(newone)-regpara)^2) ) regpara=t(newone) } list(f=loglikelihood,g=score,H=Fisher,regress=regpara) } itr_newton_frailty=function(x,y1,y2,z,mle,arpara,tind){ nbase=tind-32 ini_reg=mle ini_arpara=arpara ini_base=as.vector(rbind(rep(arpara[2],nbase),rep(arpara[4],nbase))) howfar=1 while(howfar >= 1.0e-4){ phi1=ini_arpara[1] phi2=ini_arpara[3] theta1=ini_arpara[2] theta2=ini_arpara[4] sigma1=ini_arpara[5] sigma2=ini_arpara[6] rho=ini_arpara[7] covmat=rbind(c(sigma1^2,rho*sigma1*sigma2),c(rho*sigma1*sigma2,sigma2^2)) invcovmat=qr.solve(covmat) out.baseline=cor_baseline(x,y1,y2,z,ini_reg,ini_arpara,ini_base,tind) base_est=out.baseline$series arpara_est=c(optim(par=ini_arpara[1:4],paras,inibase=base_est,sigma=covmat,tind=tind)[[1]],ini_arpara[5:7]) out.regress=reg_para_frailty(x,y1,y2,z,ini_reg,base_est,arpara_est,tind) regpara_est=out.regress$regress howfar=sqrt(sum((regpara_est-ini_reg)^2)) ini_reg=regpara_est ini_base=base_est ini_arpara=arpara_est } list(base_fisher_diag=diag(out.baseline$fisher),reg_loglikelihood=out.regress$f,reg_score=out.regress$g,reg_fisher=out.regress$H,baseline_est=matrix(base_est,2,nbase),regress_est=regpara_est,arpara_est=arpara_est) } #sigma1=0.5, sigma2=0.5 covariates=function(x,z,para){ dj=dim(x)[1] v11=x[,3]#QR 1 v12=x[,4] #CR 2 v13=x[,5] #CL 3 v14=x[,6] #IBIT 4 v15=x[,7] #TL 5 v16=x[,8] #SALE 6 v17=x[,9] #WC 7 v18=x[,10] #NI 8 v19=x[,11] #RE 9 v20=x[,12] #DTD 10 v21=x[,13] #AGE 11 v22=x[,14] #RTN 12 v23=x[,15] #STD 13 v24=(x[,2]==1)#ind1 14 v25=(x[,2]==2)#ind2 15 v26=(x[,2]==3) #ind3 16 v27=(x[,2]==4) #ind4 17 v28=rep(z[,1],dj) #indexrtn 18 v29=rep(z[,2],dj) #30yrbond 19 v30=rep(z[,3],dj) #10yrnote 20 v31=rep(z[,4],dj) #5yrnote 21 v32=rep(z[,5],dj) #1yrnote 22 v33=rep(z[,6],dj) #90daybill 23 v34=rep(z[,7],dj) #30daybill 24 v35=rep(z[,8],dj) #CPI 25 v36=rep(z[,9],dj) #ex inx 26 v37=rep(z[,10],dj) #unemp 27 v38=rep(z[,11],dj) #gdpgrt 28 liquid = 0.7*v11-0.09*v12+0.4*v17 profit = 2.8*v14-1.6*v18-0.04*v19 debts = 0.58*v13+0.49*v15 DTD = v20 AGE = v21 RTN = v22 SD = v23 Ind1=v24 Ind2=v25 vwrtn = v28 longbond = v29 midbonds = 0.3*v30-0.5*v31 shortbonds = 0.44*v33-0.4*v34 ex = v36 unem = v37 gdp = v38 mat1=cbind(liquid,profit,debts,DTD,AGE,RTN,SD,longbond,midbonds,unem, matrix(0,ncol=8,nrow=dj)) mat2=cbind(matrix(0,ncol=10,nrow=dj), liquid,profit,AGE,RTN,SD,Ind1,Ind2,unem) linear1=rowSums(t(matrix(rep(para,dj),ncol=dj))*mat1) linear2=rowSums(t(matrix(rep(para,dj),ncol=dj))*mat2) list(mat1,mat2,linear1,linear2) } mle=c(-0.84958815,-0.79395884,1.06760549,-0.51133536,0.26737205,-0.43734638,9.93639798,0.05927794,1.04982973,0.29241875,+ 0.46367106,-0.84166321,0.07405145,1.07406696,-11.49174515,-0.1615976,-0.22951649,-0.48894765) arpara=c(1.0,-8,0.5,-2,0.5,0.5,0) out.newton=itr_newton_frailty(bysurv_sampled,bybanked_sampled,bymerged_sampled,macroecono,mle,arpara,112) # in sample tests revert_pred_insample=function(x,y1,y2,z,mle,base,threshold,k){ u=length(mle) h=as.numeric(base) locmacro=which(z[,12]==(k-1))#macroeconomic variables macroeco=z[locmacro,] #survivors locsurv=which(as.numeric(names(x))==(k-1)) survivorfin=x[[locsurv]] l=dim(survivorfin)[1] #exited locbank=which(as.numeric(names(y1))==k) locmerg=which(as.numeric(names(y2))==k) if (length(locbank)==0){ dj1=0 banked=0000 }else{ banked=y1[[locbank]][,1] dj1=length(banked) } if (length(locmerg)==0){ dj2=0 merged=0000 }else{ merged=y2[[locmerg]][,1] dj2=length(merged) } #survivors covresult=covariates(survivorfin, macroeco, mle) matsurv1=covresult[[1]] matsurv2=covresult[[2]] linear01=covresult[[3]] linear02=covresult[[4]] h01=exp(h[1]+linear01) h02=exp(h[2]+linear02) hdot=h01+h02 lamda01=h01/(1+h01+h02) lamda02=h02/(1+h01+h02) #first term of taylor approximation sup01=max(lamda01) sup02=max(lamda02) result1=c(k, l, dj1, sum(lamda01), dj2, sum(lamda02)) #shannon's entrophy shannon1=0 mj1=0 Vj1=0 for (w in 1:l){ if (any(banked==survivorfin[w,1])){ shannon1=shannon1+log(lamda01[w]) }else{ if (any(merged==survivorfin[w,1])){ shannon1=shannon1+log(lamda02[w]) }else{ shannon1=shannon1+log(1-lamda01[w]-lamda02[w]) } } mij1=lamda01[w]*log(lamda01[w])+lamda02[w]*log(lamda02[w])+(1-lamda01[w]-lamda02[w])*log(1-lamda01[w]-lamda02[w]) mj1=mj1+mij1 Vj1=Vj1+lamda01[w]*(log(lamda01[w]))^2+lamda02[w]*(log(lamda02[w]))^2+(1-lamda01[w]-lamda02[w])*(log(1-lamda01[w]-lamda02[w]))^2-(mij1)^2 } zscore=(shannon1-mj1)/sqrt(Vj1) #simulation simnum=5000 highcom=which(lamda01+lamda02 >= threshold) highlamda01=lamda01[highcom] highlamda02=lamda02[highcom] lowsumlamda01=sum(lamda01)-sum(highlamda01) lowsumlamda02=sum(lamda02)-sum(highlamda02) nhighcom=length(highcom) if (nhighcom==0){ sumcol1=rep(0,simnum) sumcol2=rep(0,simnum) }else{ resultmat1=matrix(0,nrow=nhighcom,ncol=simnum) resultmat2=matrix(0,nrow=nhighcom,ncol=simnum) Pmat1=matrix(rep(highlamda01,simnum),ncol=simnum) Pmat2=matrix(rep(highlamda02,simnum),ncol=simnum) Rmat=matrix(runif(nhighcom*simnum),ncol=simnum) resultmat1[which(Rmat <= Pmat1)]=1 resultmat2[which(Rmat > Pmat1 & Rmat <= (Pmat1+Pmat2))]=1 sumcol1=colSums(resultmat1) sumcol2=colSums(resultmat2) } poisim1=rpois(simnum,lowsumlamda01) poisim2=rpois(simnum,lowsumlamda02) lower1=quantile((poisim1+sumcol1),0.025) lower2=quantile((poisim2+sumcol2),0.025) upper1=quantile((poisim1+sumcol1),0.975) upper2=quantile((poisim2+sumcol2),0.975) predint=c(lower1,upper1,lower2,upper2) list(result1=result1, zscore=zscore, predint=predint) } regress=out.newton$regress_est baseline=t(out.newton$baseline_est) fitted=matrix(0,nrow=6,ncol=80) entropy=rep(0,80) predint=matrix(0,nrow=4,ncol=80) for (w in 1:80){ out.pred=revert_pred_insample(bysurv_outsample,bybanked_outsample,bymerged_outsample,macroecono,regress,baseline[w,],0.15,32+w); fitted[,w]=matrix(out.pred$result1) entropy[w]=out.pred$zscore predint[,w]=matrix(out.pred$predint) } matrix(regress) matrix(baseline) matrix(entropy)