# The codes are for cross validation of corporate credit rating migration under basic orded logit model with 3 categories. # Data sets. # Note that GVKEY = company ids, tindex =1 means the 1st quarter of 1979. #variables=read.table("/home/tbae/voldata/rating_com_cov.txt",header=TRUE) #variables=variables[-2] #macroecono=read.table("/home/tbae/voldata/macroeco.txt",header=TRUE) variables=read.table("F:/Merged/rating_com_cov.txt",header=TRUE) variables=variables[-2] macroecono=read.table("F:/Merged/macroeco.txt",header=TRUE) variables[,16]=100*variables[,16] loc=which(variables[,2]==0 | variables[,3]==0) variables=variables[-loc,] loc11=which(variables[,2]==8) # state 8 is an abosrbing state variables=variables[-loc11,] locdelmac=which(macroecono[,12] <= 27 | macroecono[,12] >= 112) macroecono=macroecono[-locdelmac,] locdel=which(variables[,18] <= 27 | variables[,18] >= 112) variables=variables[-locdel,] #truncation 1percentile and 99percentile for (j in 5:17){ loc1=which(variables[,j] <= quantile(variables[,j],0.01)) loc2=which(variables[,j] >= quantile(variables[,j],0.99)) variables[loc1,j]= quantile(variables[,j],0.01) variables[loc2,j]= quantile(variables[,j],0.99) } loc12=which(variables[,2] <= 3) variables[loc12,2]=1 loc13=which((variables[,2] > 3) & (variables[,2] <= 6)) variables[loc13,2]=2 loc14=which(variables[,2] == 7) variables[loc14,2]=3 loc22=which(variables[,3] <= 3) variables[loc22,3]=1 loc23=which((variables[,3] > 3) & (variables[,3] <= 6)) variables[loc23,3]=2 loc24=which(variables[,3] == 7) variables[loc24,3]=3 loc25=which(variables[,3] == 8) variables[loc25,3]=4 #for sharcnet #variables=read.table("/home/tbae/voldata/ratingcov.txt",header=TRUE) #variables=variables[-2] #macroecono=read.table("/home/tbae/voldata/macroeco.txt",header=TRUE) #Random Sampling of 1300 firms ids=variables[,1] gvkeys=as.numeric(levels(factor(ids))) sampled = sample(gvkeys,1300) sampledloc=numeric() for (i in sampled){ sampledloc=c(sampledloc,which(ids == i)) } variables_est=variables[sampledloc,] # to be used for estimation variables_pred=variables[-sampledloc,] # to be used for prediction byratings_est=by(variables_est,variables_est[,c(3,2)],function(y){y}) byratings_pred=by(variables_pred,variables_pred[,c(3,2)],function(y){y}) #we consider rating data available from December 1985. That is, qtr=28 #rating==0 means no rating. consider rating 1 to 8. row_sums=function(matx){ if (is.vector(matx)){ rowsums=sum(matx) }else{ if (dim(matx)[1]==0){ rowsums=0 }else{ if (dim(matx)[1]==1){ rowsums=sum(matx) }else{ rowsums=rowSums(matx) } } } rowsums } Gcdf=function(x){ cumul=rep(0,length(x)) loc1=which(x > 500) loc2=which(x <= 500) cumul[loc1]=1 cumul[loc2]=exp(x[loc2])/(1+exp(x[loc2])) cumul exp(x)/(1+exp(x)) } Gprime=function(y){ Gcdf(y)*(1-Gcdf(y)) } Gtwoprime=function(y){ Gcdf(y)*(1-Gcdf(y))*(1-2*Gcdf(y)) } loglikelihood=function(x,z,para,tind){ #c=(c1,c2,c3,...,c7) gamma=c(gamma1,...,gamma7) for identifiability set c1=0, and sigma1=1 u=length(para) threshold=c(0,para[1], sum(para[1:2])) gamma=c(para[3],sum(para[3:4]), sum(para[3:5])) sigma=c(1,para[6:7]) beta=c(para[8:u]) u1=length(threshold) u2=length(gamma) u3=length(sigma) u4=length(beta) loglikelihood=0 #regression parameter estiamtion for (tt in 29:tind){ locmacro=which(z[,12]==(tt-1)) #macroeconomic variables macroeco=z[locmacro,] for (kk in 1:12){ locsurv=which(x[[kk]][,18]==(tt-1)) dkl=length(locsurv) if (dkl==0){ next } covariates=x[[kk]][locsurv,] states=x[[kk]][1,2] nextstates=x[[kk]][1,3] # moved transitions at tt linearmat=const_covariates(covariates,macroeco,beta) if(nextstates < 4){ ESkl=1/sigma[states]*(threshold[nextstates]-gamma[states] - linearmat[[2]]) } if (nextstates > 1){ ESkl1=1/sigma[states]*(threshold[nextstates-1]-gamma[states] - linearmat[[2]]) } if (nextstates == 4){ phi_kl= 1-Gcdf(ESkl1) ithloglike=-log(1+exp(ESkl1)) }else{ if (nextstates == 1){ phi_kl= Gcdf(ESkl) ithloglike=ESkl - log(1+exp(ESkl)) }else{ phi_kl=Gcdf(ESkl)-Gcdf(ESkl1) ithloglike=log(exp(ESkl)-exp(ESkl1)) -log(1+exp(ESkl))-log(1+exp(ESkl1)) } } loglikelihood=loglikelihood+sum(ithloglike) } } print(loglikelihood) -loglikelihood } const_covariates=function(x,z,newregress){ dj=dim(x)[1] v11=x[,5]#QR 2 v12=x[,6] #CR 3 v13=x[,7] #CL 4 v14=x[,8] #IBIT 5 v15=x[,9] #TL 6 v16=x[,10] #SALE 7 v17=x[,11] #WC 8 v18=x[,12] #NI 9 v19=x[,13] #RE 10 v20=x[,14] #DTD 11 v21=x[,15] #AGE 12 v22=x[,16] #RTN 13 v23=x[,17] #STD 14 v24=(x[,4]==1)#ind1 15 v25=(x[,4]==2)#ind2 16 v26=(x[,4]==3) #ind3 17 v27=(x[,4]==4) #ind4 18 v28=rep(z[,1],dj) #indexrtn 19 v29=rep(z[,2],dj) #30yrbond 20 v30=rep(z[,3],dj) #10yrnote 21 v31=rep(z[,4],dj) #5yrnote 22 v32=rep(z[,5],dj) #1yrnote 23 v33=rep(z[,6],dj) #90daybill 24 v34=rep(z[,7],dj) #30daybill 25 v35=rep(z[,8],dj) #CPI 26 v36=rep(z[,9],dj) #ex inx 27 v37=rep(z[,10],dj) #unemp 28 v38=rep(z[,11],dj) #gdpgrt 29 liquid = -0.13*v12+1.56*v17 profit = v14 debt = -1.47*v13+0.37*v15 RTN = v22 DTD = v20 STD = v23 IND2 = v25 IND3 = v26 B90 = v33 CPI =v35 mat=cbind(liquid,profit,debt,RTN,DTD,STD,IND2,IND3,B90,CPI) if (dj==1){ mat=as.matrix(mat) } linear=rowSums(t(matrix(rep(newregress,dj),ncol=dj))*mat) list(mat,linear) } starting=c(14.7363,7.5547,-2.0214,10.4773,8.0618,1.1889,0.7890,+ -1.1221,-7.1481,0.8916,-0.4433,-0.4627,27.2009,-0.2516,0.4539,-0.1165,-0.0880) result=optim(par=starting,fn=loglikelihood, method="L-BFGS-B", control=list(trace=6,maxit=500),low=c(rep(0.01,2),-Inf,rep(0.01,2),rep(0.01,2),rep(-Inf,10)), x=byratings_est,z=macroecono,tind=112) result rating_insample=function(x,z,para,k){ u=length(para) threshold=c(0,para[1], sum(para[1:2])) gamma=c(para[3],sum(para[3:4]), sum(para[3:5])) sigma=c(1,para[6:7]) beta=c(para[8:u]) locmacro=which(z[,12]==(k-1))#macroeconomic variables macroeco=z[locmacro,] transition = vector("list", 12) transition.moved = vector("list", 12) shannon=0 shannon.move=0 mj1=0 vj1=0 mj1moved=0 vj1moved=0 for (kk in 1:12){ locsurv=which(x[[kk]][,18]==(k-1)) dkl=length(locsurv) if (dkl==0){ next } covariates=x[[kk]][locsurv,] linearmat=const_covariates(covariates,macroeco,beta) states=x[[kk]][1,2] pred_prob=numeric() for (jj in 1:4){ if(jj < 4){ ESkl=1/sigma[states]*(threshold[jj]-gamma[states] - linearmat[[2]]) } if (jj > 1){ ESkl1=1/sigma[states]*(threshold[jj-1]-gamma[states] - linearmat[[2]]) } if (jj == 4){ phi_kl= 1-Gcdf(ESkl1) }else{ if (jj == 1){ phi_kl= Gcdf(ESkl) }else{ phi_kl=Gcdf(ESkl)-Gcdf(ESkl1) } } pred_prob=cbind(pred_prob,phi_kl) } currentstates=rep(states,dkl) nextstates=x[[kk]][locsurv,3] # vector of next states at k transition[[kk]]=cbind(pred_prob,currentstates,nextstates) moved.trans=cbind(pred_prob[,states],row_sums(pred_prob[,-states]),!(currentstates==nextstates)) transition.moved[[kk]]=moved.trans #shannon's entrophy for (w in 1:dkl){ if (nextstates[w]==1){ shannon=shannon+log(pred_prob[w,1]) } if (nextstates[w]==2){ shannon=shannon+log(pred_prob[w,2]) } if (nextstates[w]==3){ shannon=shannon+log(pred_prob[w,3]) } if (nextstates[w]==4){ shannon=shannon+log(pred_prob[w,4]) } mij1=pred_prob[w,1]*log(pred_prob[w,1])+pred_prob[w,2]*log(pred_prob[w,2])+pred_prob[w,3]*log(pred_prob[w,3])+pred_prob[w,4]*log(pred_prob[w,4]) vij1=pred_prob[w,1]*log(pred_prob[w,1])^2+pred_prob[w,2]*log(pred_prob[w,2])^2+pred_prob[w,3]*log(pred_prob[w,3])^2+pred_prob[w,4]*log(pred_prob[w,4])^2-mij1^2 mj1=mj1+sum(mij1) vj1=vj1+sum(vij1) } #change or not entropy # for (w in 1:dkl){ # if (moved.trans[w,3]){ # shannon.jth=log(moved.trans[w,2]) # }else{ # shannon.jth=log(moved.trans[w,1]) # } # # shannon.move = shannon.move+shannon.jth # # mij1moved=moved.trans[w,1]*log(moved.trans[w,1])+moved.trans[w,2]*log(moved.trans[w,2]) # vij1moved=moved.trans[w,1]*log(moved.trans[w,1])^2+moved.trans[w,2]*log(moved.trans[w,2])^2-mij1moved^2 # mj1moved=mj1moved+sum(mij1moved) # vj1moved=vj1moved+sum(vij1moved) # } } zscore=(shannon-mj1)/sqrt(vj1) #zscore.moved=(shannon.move-mj1moved)/sqrt(vj1moved) list(transition=transition, transition.moved=transition.moved,score=zscore) #list(transition=transition, transition.moved=transition.moved,score=zscore,zscore.moved=zscore.moved) } #mle=starting mle=result$par entropy=numeric() for (tt in 29:112){ results=rating_insample(x=byratings_pred,z=macroecono,para=mle,tt) entropy=c(entropy,results$zscore) }