# The codes are for corporate credit rating migration under orded logit model with frailty. 4 categories. # Data sets. # Note that GVKEY = company ids, tindex =1 means the 1st quarter of 1979. #variables=read.table("/home/tbae/voldata/rating_com_cov.txt",header=TRUE) #variables=variables[-2] #macroecono=read.table("/home/tbae/voldata/macroeco.txt",header=TRUE) variables=read.table("K:/Merged/rating_com_cov.txt",header=TRUE) variables=variables[-2] macroecono=read.table("K:/Merged/macroeco.txt",header=TRUE) variables[,16]=100*variables[,16] loc=which(variables[,2]==0 | variables[,3]==0) variables=variables[-loc,] loc11=which(variables[,2]==8) # state 8 is an abosrbing state variables=variables[-loc11,] locdelmac=which(macroecono[,12] <= 27 | macroecono[,12] >= 112) macroecono=macroecono[-locdelmac,] locdel=which(variables[,18] <= 27 | variables[,18] >= 112) variables=variables[-locdel,] #trunctation 1percentile and 99percentile for (j in 5:17){ loc1=which(variables[,j] <= quantile(variables[,j],0.01)) loc2=which(variables[,j] >= quantile(variables[,j],0.99)) variables[loc1,j]= quantile(variables[,j],0.01) variables[loc2,j]= quantile(variables[,j],0.99) } loc12=which(variables[,2] <= 3) variables[loc12,2]=1 loc13=which((variables[,2] > 3) & (variables[,2] <= 6)) variables[loc13,2]=2 loc14=which(variables[,2] == 7) variables[loc14,2]=3 loc22=which(variables[,3] <= 3) variables[loc22,3]=1 loc23=which((variables[,3] > 3) & (variables[,3] <= 6)) variables[loc23,3]=2 loc24=which(variables[,3] == 7) variables[loc24,3]=3 loc25=which(variables[,3] == 8) variables[loc25,3]=4 #for sharcnet #variables=read.table("/home/tbae/voldata/ratingcov.txt",header=TRUE) #variables=variables[-2] #macroecono=read.table("/home/tbae/voldata/macroeco.txt",header=TRUE) byratings=by(variables,variables[,c(3,2)],function(y){y}) #we consider rating data available from December 1985. That is, qtr=28 #rating==0 means no rating. consider rating 1 to 8. col_sums=function(matx){ if (is.vector(matx)){ matx=t(matrix(matx)) } if (dim(matx)[1]==0){ colsums=0 }else{ if (dim(matx)[1]==1){ colsums=matx }else{ colsums=colSums(matx) } } colsums } Gcdf=function(x){ cumul=rep(0,length(x)) loc1=which(x > 500) loc2=which(x <= 500) cumul[loc1]=1 cumul[loc2]=exp(x[loc2])/(1+exp(x[loc2])) cumul exp(x)/(1+exp(x)) } Gprime=function(y){ Gcdf(y)*(1-Gcdf(y)) } Gtwoprime=function(y){ Gcdf(y)*(1-Gcdf(y))*(1-2*Gcdf(y)) } loglike_frailty=function(x,z,para,serials,arpara,tind){ #c=(c1,c2,c3,...,c7) gamma=c(gamma1,...,gamma7) for identifiability set c1=0, and sigma1=1 u=length(para) threshold=c(0,para[1], sum(para[1:2])) gamma=c(para[3],sum(para[3:4]), sum(para[3:5])) sigma=c(1,para[6:7]) beta=c(para[8:u]) alpha=arpara[1] mu = arpara[2] smooth =arpara[3] u1=length(threshold) u2=length(gamma) u3=length(sigma) u4=length(beta) loglikelihood=0 #regression parameter estiamtion for (tt in 29:tind){ locmacro=which(z[,12]==(tt-1)) #macroeconomic variables macroeco=z[locmacro,] for (kk in 1:12){ locsurv=which(x[[kk]][,18]==(tt-1)) dkl=length(locsurv) if (dkl==0){ next } covariates=x[[kk]][locsurv,] states=x[[kk]][1,2] nextstates=x[[kk]][1,3] # moved transitions at tt linearmat=firm_cov(covariates,macroeco,beta) if(nextstates < 4){ ESkl=1/sigma[states]*(threshold[nextstates]-gamma[states] - linearmat[[2]]-serials[tt-28]) } if (nextstates > 1){ ESkl1=1/sigma[states]*(threshold[nextstates-1]-gamma[states] - linearmat[[2]]-serials[tt-28]) } if (nextstates == 4){ phi_kl= 1-Gcdf(ESkl1) ithloglike=-log(1+exp(ESkl1)) }else{ if (nextstates == 1){ phi_kl= Gcdf(ESkl) ithloglike=ESkl - log(1+exp(ESkl)) }else{ phi_kl=Gcdf(ESkl)-Gcdf(ESkl1) ithloglike=log(exp(ESkl)-exp(ESkl1)) -log(1+exp(ESkl))-log(1+exp(ESkl1)) } } loglikelihood=loglikelihood+sum(ithloglike) } if (tt == 29){ penalty = 0.5*log(2*pi)+log(smooth)+0.5*(serials[tt-28]-mu)^2/smooth^2 }else{ penalty = 0.5*log(2*pi)+log(smooth)+0.5*(serials[tt-28]-serials[tt-29]-alpha*(mu-serials[tt-29]))^2/smooth^2 } loglikelihood = loglikelihood - penalty } print(loglikelihood) -loglikelihood } serialdepend=function(x,z,para,arpara,ini_frail,tind){ #c=(c1,c2,c3,...,c7) gamma=c(gamma1,...,gamma7) for identifiability set c1=0, and sigma1=1 u=length(para) threshold=c(0,para[1], sum(para[1:2])) gamma=c(para[3],sum(para[3:4]), sum(para[3:5])) sigma=c(1,para[6:7]) beta=c(para[8:u]) alpha=arpara[1] mu = arpara[2] smooth =arpara[3] u1=length(threshold) u2=length(gamma) u3=length(sigma) u4=length(beta) nperiod=tind-28 serials = ini_frail dist = 1 while (dist >= 1.0e-3){ scorez = rep(0,nperiod) fisherz = matrix(0,nperiod,nperiod) for (tt in 29:tind){ locmacro=which(z[,12]==(tt-1)) #macroeconomic variables macroeco=z[locmacro,] pointprime = 0 pointdoubleprime = 0 for (kk in 1:12){ locsurv=which(x[[kk]][,18]==(tt-1)) dkl=length(locsurv) if (dkl==0){ next } covariates=x[[kk]][locsurv,] states=x[[kk]][1,2] nextstates=x[[kk]][1,3] # moved transitions at tt linearmat=firm_cov(covariates,macroeco,beta) if(nextstates < 4){ ESkl=1/sigma[states]*(threshold[nextstates]-gamma[states] - linearmat[[2]]-serials[tt-28]) } if (nextstates > 1){ ESkl1=1/sigma[states]*(threshold[nextstates-1]-gamma[states] - linearmat[[2]]-serials[tt-28]) } if (nextstates == 4){ pointprime= pointprime + sum(Gcdf(ESkl1)/sigma[states]) pointdoubleprime= pointdoubleprime + sum(((Gcdf(ESkl1)-1)*Gcdf(ESkl1))/sigma[states]^2) }else{ if (nextstates == 1){ pointprime= pointprime + sum((Gcdf(ESkl)-1)/sigma[states]) pointdoubleprime= pointdoubleprime + sum(((Gcdf(ESkl)-1)*Gcdf(ESkl))/sigma[states]^2) }else{ pointprime= pointprime + sum((Gcdf(ESkl)+Gcdf(ESkl1)-1)/sigma[states]) pointdoubleprime= pointdoubleprime + sum(((Gcdf(ESkl)-1)*Gcdf(ESkl)+(Gcdf(ESkl1)-1)*Gcdf(ESkl1))/sigma[states]^2) } } } if ( tt==29){ scorez[tt-28]=pointprime -1/smooth^2*(serials[tt-28]-mu+(alpha-1)*(serials[tt-27]-serials[tt-28]-alpha*(mu-serials[tt-28]))) fisherz[tt-28,tt-28]=pointdoubleprime-1/smooth^2*(1+(alpha-1)^2) fisherz[tt-28,tt-27]=-1/smooth^2*(alpha-1) fisherz[tt-27,tt-28]=-1/smooth^2*(alpha-1) } if ( tt==tind){ scorez[tt-28]=pointprime -1/smooth^2*(serials[tt-28]-serials[tt-29]-alpha*(mu-serials[tt-29])) fisherz[tt-28,tt-28]=pointdoubleprime-1/smooth^2 } if ( (tt > 29) & (tt < tind) ){ scorez[tt-28]=pointprime -1/smooth^2*((serials[tt-28]-serials[tt-29]-alpha*(mu-serials[tt-29]))+(alpha-1)*(serials[tt-27]-serials[tt-28]-alpha*(mu-serials[tt-28]))) fisherz[tt-28,tt-28]=pointdoubleprime-1/smooth^2*(1+(alpha-1)^2) fisherz[tt-28,tt-27]=-1/smooth^2*(alpha-1) fisherz[tt-27,tt-28]=-1/smooth^2*(alpha-1) } } newserials=matrix(serials)-qr.solve(fisherz)%*%matrix(scorez) dist=sqrt(sum((newserials-matrix(serials))^2)) print(dist) serials =newserials } list(serials=serials, score=scorez, fisher=-fisherz) } arparas=function(alphamu,serials,tind){ nbase=tind-28 alpha=alphamu[1] mu=alphamu[2] firstterm=(serials[1]-mu)^2 secondterm=0 for (i in 2:nbase){ secondterm=secondterm+(serials[i]-serials[i-1]-alpha*(mu-serials[i-1]))^2 } firstterm+secondterm } itr_rating_frailty=function(x,z,para,arpara,tind){ u=length(para) threshold=c(0,para[1], sum(para[1:2])) gamma=c(para[3],sum(para[3:4]), sum(para[3:5])) sigma=c(1,para[6:7]) beta=c(para[8:u]) u1=length(threshold) u2=length(gamma) u3=length(sigma) u4=length(beta) nbase=tind-28 ini_reg=para ini_arpara=arpara ini_frail=rep(ini_arpara[2],nbase) howfar=1 while(howfar >= 1.0e-4){ out.frailties=serialdepend(x,z,ini_reg,ini_arpara,ini_frail,tind) frail_est=out.frailties$serials print(frail_est) arpara_est=c(optim(par=ini_arpara[1:2],arparas,serials=frail_est,tind=tind)[[1]],ini_arpara[3]) print(arpara_est) out.regress=optim(par=ini_reg,fn=loglike_frailty, method="L-BFGS-B", control=list(trace=6,maxit=500),low=c(rep(0.01,2),-Inf,rep(0.01,2),rep(0.01,2),rep(-Inf,u4)), x=x,z=z,serials=frail_est, arpara=arpara_est, tind=tind) if (out.regress$convergence > 0){ print("optimization failed to converge") break } regpara_est=out.regress$par print(regpara_est) howfar=sqrt(sum((regpara_est-ini_reg)^2)) print(howfar) ini_reg=regpara_est ini_frail=frail_est ini_arpara=arpara_est } list(frail_fisher_diag=diag(out.frailties$fisher),reg_loglikelihood=out.regress$value,frailty_est=frail_est,regress_est=regpara_est,arpara_est=arpara_est) } #sigma = 0.1 firm_cov=function(x,z,newregress){ dj=dim(x)[1] v11=x[,5]#QR 2 v12=x[,6] #CR 3 v13=x[,7] #CL 4 v14=x[,8] #IBIT 5 v15=x[,9] #TL 6 v16=x[,10] #SALE 7 v17=x[,11] #WC 8 v18=x[,12] #NI 9 v19=x[,13] #RE 10 v20=x[,14] #DTD 11 v21=x[,15] #AGE 12 v22=x[,16] #RTN 13 v23=x[,17] #STD 14 v24=(x[,4]==1)#ind1 15 v25=(x[,4]==2)#ind2 16 v26=(x[,4]==3) #ind3 17 v27=(x[,4]==4) #ind4 18 v28=rep(z[,1],dj) #indexrtn 19 v29=rep(z[,2],dj) #30yrbond 20 v30=rep(z[,3],dj) #10yrnote 21 v31=rep(z[,4],dj) #5yrnote 22 v32=rep(z[,5],dj) #1yrnote 23 v33=rep(z[,6],dj) #90daybill 24 v34=rep(z[,7],dj) #30daybill 25 v35=rep(z[,8],dj) #CPI 26 v36=rep(z[,9],dj) #ex inx 27 v37=rep(z[,10],dj) #unemp 28 v38=rep(z[,11],dj) #gdpgrt 29 liquid = -0.13*v12+1.56*v17 profit = v14 debt = -1.47*v13+0.37*v15 RTN = v22 DTD = v20 STD = v23 IND2 = v25 IND3 = v26 B90 = v33 CPI =v35 mat=cbind(liquid,profit,debt,RTN,DTD,STD,IND2,IND3,CPI) if (dj==1){ mat=as.matrix(mat) } linear=rowSums(t(matrix(rep(newregress,dj),ncol=dj))*mat) list(mat,linear) } starting=c(13.6195,6.8093,-1.0209,9.9141,7.4340,1.0873,0.7107,+ -0.9905,-6.2286,0.9624,-0.4531,-0.4501,23.4936,-0.2747,0.4417,-0.1159) arpara_start=c(0.0019,-0.4287,0.1000) #mle=out.frailty$regress_est #arpara=out.frailty$arpara_est out.frailty=itr_rating_frailty(x=byratings,z=macroecono,para=starting, arpara=arpara_start,tind=112) out.frailty # sigma >= 0.5 firm_cov=function(x,z,newregress){ dj=dim(x)[1] v11=x[,5]#QR 2 v12=x[,6] #CR 3 v13=x[,7] #CL 4 v14=x[,8] #IBIT 5 v15=x[,9] #TL 6 v16=x[,10] #SALE 7 v17=x[,11] #WC 8 v18=x[,12] #NI 9 v19=x[,13] #RE 10 v20=x[,14] #DTD 11 v21=x[,15] #AGE 12 v22=x[,16] #RTN 13 v23=x[,17] #STD 14 v24=(x[,4]==1)#ind1 15 v25=(x[,4]==2)#ind2 16 v26=(x[,4]==3) #ind3 17 v27=(x[,4]==4) #ind4 18 v28=rep(z[,1],dj) #indexrtn 19 v29=rep(z[,2],dj) #30yrbond 20 v30=rep(z[,3],dj) #10yrnote 21 v31=rep(z[,4],dj) #5yrnote 22 v32=rep(z[,5],dj) #1yrnote 23 v33=rep(z[,6],dj) #90daybill 24 v34=rep(z[,7],dj) #30daybill 25 v35=rep(z[,8],dj) #CPI 26 v36=rep(z[,9],dj) #ex inx 27 v37=rep(z[,10],dj) #unemp 28 v38=rep(z[,11],dj) #gdpgrt 29 liquid = -0.13*v12+1.56*v17 profit = v14 debt = -1.47*v13+0.37*v15 RTN=v22 DTD = v20 STD = v23 IND2 = v25 IND3 = v26 B90 = v33 CPI =v35 mat=cbind(liquid,profit,debt,RTN,DTD,STD,IND2,IND3) if (dj==1){ mat=as.matrix(mat) } linear=rowSums(t(matrix(rep(newregress,dj),ncol=dj))*mat) list(mat,linear) } starting=c(13.3253,6.7003,-1.0935,9.8002,7.2956,1.0579,0.6934,+ -0.9668,-6.0242,0.9526,-0.4583,-0.4440,23.0607,-0.2717,0.4214) arpara_start=c(0.2209,-1.0258,0.5) starting=c(13.7914,6.9500,-1.0798,10.0302,7.5728,1.0942,0.7158,+ -0.9897,-6.1837,0.9871,-0.4846,-0.4531,24.1043,-0.2819,0.4199) arpara_start=c(0.4459,-1.1257,1.0) starting=c(14.1059,7.1209,-1.0645,10.1875,7.7623,1.1189,0.7327,+ -1.0071,-6.3032,1.0096,-0.5037,-0.4600,24.6628,-0.2902,0.4166) arpara_start=c(0.5381,-1.1399,5.0) starting=c(14.1059,7.1209,-1.0645,10.1875,7.7623,1.1189,0.7327,+ -1.0071,-6.3032,1.0096,-0.5037,-0.4600,24.6628,-0.2902,0.4166) arpara_start=c(0.5381,-1.1399,10.0) starting=c(14.1446,7.1428,-1.0598,10.2074,7.7844,1.1220,0.7351,+ -1.0128,-6.3775,1.0110,-0.5048,-0.4609,24.6620,-0.2918,0.4168) arpara_start=c(0.5416,-1.1402,20.0) out.frailty=itr_rating_frailty(x=byratings,z=macroecono,para=starting, arpara=arpara_start,tind=112) out.frailty # in sample prediction tests rating_frailty_insample=function(x,z,para,serials,k){ u=length(para) threshold=c(0,para[1], sum(para[1:2])) gamma=c(para[3],sum(para[3:4]), sum(para[3:5])) sigma=c(1,para[6:7]) beta=c(para[8:u]) locmacro=which(z[,12]==(k-1))#macroeconomic variables macroeco=z[locmacro,] transition = vector("list", 12) shannon1=0 mj1=0 vj1=0 for (kk in 1:12){ locsurv=which(x[[kk]][,18]==(k-1)) dkl=length(locsurv) if (dkl==0){ next } covariates=x[[kk]][locsurv,] linearmat=firm_cov(covariates,macroeco,beta) states=x[[kk]][1,2] pred_prob=numeric() for (jj in 1:4){ if(jj < 4){ ESkl=1/sigma[states]*(threshold[jj]-gamma[states] - linearmat[[2]]-serials[k-28]) } if (jj > 1){ ESkl1=1/sigma[states]*(threshold[jj-1]-gamma[states] - linearmat[[2]]-serials[k-28]) } if (jj == 4){ phi_kl= 1-Gcdf(ESkl1) }else{ if (jj == 1){ phi_kl= Gcdf(ESkl) }else{ phi_kl=Gcdf(ESkl)-Gcdf(ESkl1) } } pred_prob=cbind(pred_prob,phi_kl) } nextstates=x[[kk]][locsurv,3] # vector of next states at k transition[[kk]]=cbind(pred_prob,nextstates) #shannon's entrophy for (w in 1:dkl){ if (nextstates[w]==1){ shannon1=shannon1+log(pred_prob[w,1]) } if (nextstates[w]==2){ shannon1=shannon1+log(pred_prob[w,2]) } if (nextstates[w]==3){ shannon1=shannon1+log(pred_prob[w,3]) } if (nextstates[w]==4){ shannon1=shannon1+log(pred_prob[w,4]) } mij1=pred_prob[w,1]*log(pred_prob[w,1])+pred_prob[w,2]*log(pred_prob[w,2])+pred_prob[w,3]*log(pred_prob[w,3])+pred_prob[w,4]*log(pred_prob[w,4]) vij1=pred_prob[w,1]*log(pred_prob[w,1])^2+pred_prob[w,2]*log(pred_prob[w,2])^2+pred_prob[w,3]*log(pred_prob[w,3])^2+pred_prob[w,4]*log(pred_prob[w,4])^2-mij1^2 mj1=mj1+sum(mij1) vj1=vj1+sum(vij1) } } zscore=(shannon1-mj1)/sqrt(vj1) list(transition=transition, zscore=zscore) } mle=out.frailty$regress_est serials=out.frailty$frailty_est entropy=numeric() for (tt in 29:112){ results=rating_frailty_insample(x=byratings,z=macroecono,para=mle,serials=serials,tt) entropy=c(entropy,results$zscore) }