# The codes are for corporate credit rating migration under orded logit model with frailty. 4 categories. 

# Data sets.
# Note that GVKEY = company ids,  tindex =1 means the 1st quarter of 1979.


#variables=read.table("/home/tbae/voldata/rating_com_cov.txt",header=TRUE)
#variables=variables[-2]
#macroecono=read.table("/home/tbae/voldata/macroeco.txt",header=TRUE)

variables=read.table("K:/Merged/rating_com_cov.txt",header=TRUE)
variables=variables[-2]
macroecono=read.table("K:/Merged/macroeco.txt",header=TRUE)

variables[,16]=100*variables[,16]


loc=which(variables[,2]==0 | variables[,3]==0)
variables=variables[-loc,]


loc11=which(variables[,2]==8) # state 8 is an abosrbing state
variables=variables[-loc11,]



locdelmac=which(macroecono[,12] <= 27 | macroecono[,12] >= 112)
macroecono=macroecono[-locdelmac,]


locdel=which(variables[,18] <= 27 | variables[,18] >= 112)
variables=variables[-locdel,]

#trunctation 1percentile and 99percentile
for (j in 5:17){
               loc1=which(variables[,j] <= quantile(variables[,j],0.01))
               loc2=which(variables[,j] >= quantile(variables[,j],0.99))
               variables[loc1,j]= quantile(variables[,j],0.01)
               variables[loc2,j]= quantile(variables[,j],0.99)
}




loc12=which(variables[,2] <= 3)
variables[loc12,2]=1

loc13=which((variables[,2] > 3) & (variables[,2] <= 6))
variables[loc13,2]=2

loc14=which(variables[,2] == 7)
variables[loc14,2]=3

loc22=which(variables[,3] <= 3)
variables[loc22,3]=1

loc23=which((variables[,3] > 3) & (variables[,3] <= 6))
variables[loc23,3]=2

loc24=which(variables[,3] == 7)
variables[loc24,3]=3

loc25=which(variables[,3] == 8)
variables[loc25,3]=4


#for sharcnet
#variables=read.table("/home/tbae/voldata/ratingcov.txt",header=TRUE)
#variables=variables[-2]
#macroecono=read.table("/home/tbae/voldata/macroeco.txt",header=TRUE)



byratings=by(variables,variables[,c(3,2)],function(y){y})


#we consider rating data available from December 1985. That is, qtr=28 
#rating==0 means no rating. consider rating 1 to 8.

col_sums=function(matx){
        if (is.vector(matx)){
           matx=t(matrix(matx))
        }
        
        if (dim(matx)[1]==0){
           colsums=0
        }else{
        
            if (dim(matx)[1]==1){
                colsums=matx
            }else{
                colsums=colSums(matx)
            }
        }
        colsums
}    

Gcdf=function(x){
        cumul=rep(0,length(x))
        loc1=which(x > 500)
        loc2=which(x <= 500)
        cumul[loc1]=1
        cumul[loc2]=exp(x[loc2])/(1+exp(x[loc2]))
        cumul
        exp(x)/(1+exp(x))
}

Gprime=function(y){
        Gcdf(y)*(1-Gcdf(y))
}

Gtwoprime=function(y){
        Gcdf(y)*(1-Gcdf(y))*(1-2*Gcdf(y))
}
        
    

        
loglike_frailty=function(x,z,para,serials,arpara,tind){
          #c=(c1,c2,c3,...,c7) gamma=c(gamma1,...,gamma7) for identifiability set c1=0, and sigma1=1 
          u=length(para)
          threshold=c(0,para[1], sum(para[1:2]))
          gamma=c(para[3],sum(para[3:4]), sum(para[3:5]))       
          sigma=c(1,para[6:7])          
          beta=c(para[8:u])
          
          alpha=arpara[1]
          mu = arpara[2] 
          smooth =arpara[3]
          
          u1=length(threshold)
          u2=length(gamma)
          u3=length(sigma)
          u4=length(beta)         
                    
          loglikelihood=0  
            
     #regression parameter estiamtion

            for (tt in 29:tind){
           
                locmacro=which(z[,12]==(tt-1)) #macroeconomic variables
                macroeco=z[locmacro,]
            
                for (kk in 1:12){
                     locsurv=which(x[[kk]][,18]==(tt-1))
                     dkl=length(locsurv)
 
                     if (dkl==0){
                         next
                     }
                 
                    covariates=x[[kk]][locsurv,]
                    states=x[[kk]][1,2]
                    nextstates=x[[kk]][1,3] # moved transitions at tt
                 
                    linearmat=firm_cov(covariates,macroeco,beta)
                    
                    
                    if(nextstates < 4){
                                     ESkl=1/sigma[states]*(threshold[nextstates]-gamma[states] - linearmat[[2]]-serials[tt-28])
                                     
                    }
                    
                    if  (nextstates > 1){
                                     ESkl1=1/sigma[states]*(threshold[nextstates-1]-gamma[states] - linearmat[[2]]-serials[tt-28])
                                     
                    }
                    
                    if (nextstates == 4){
                                        phi_kl= 1-Gcdf(ESkl1)
                                        ithloglike=-log(1+exp(ESkl1))
                                        }else{
                                              if (nextstates == 1){
                                                                  phi_kl= Gcdf(ESkl)
                                                                  ithloglike=ESkl - log(1+exp(ESkl))
                                                                  }else{
                                                                        phi_kl=Gcdf(ESkl)-Gcdf(ESkl1)
                                                                        ithloglike=log(exp(ESkl)-exp(ESkl1)) -log(1+exp(ESkl))-log(1+exp(ESkl1))
                                                                        }
                                              }

                 
                    loglikelihood=loglikelihood+sum(ithloglike)
               }
               
               if (tt == 29){
                            penalty = 0.5*log(2*pi)+log(smooth)+0.5*(serials[tt-28]-mu)^2/smooth^2
               }else{
                    penalty = 0.5*log(2*pi)+log(smooth)+0.5*(serials[tt-28]-serials[tt-29]-alpha*(mu-serials[tt-29]))^2/smooth^2
              }
              
              loglikelihood = loglikelihood - penalty 
       }
        print(loglikelihood)
       -loglikelihood
}



  



serialdepend=function(x,z,para,arpara,ini_frail,tind){
          #c=(c1,c2,c3,...,c7) gamma=c(gamma1,...,gamma7) for identifiability set c1=0, and sigma1=1 
          u=length(para)
          threshold=c(0,para[1], sum(para[1:2]))
          gamma=c(para[3],sum(para[3:4]), sum(para[3:5]))       
          sigma=c(1,para[6:7])          
          beta=c(para[8:u])
          
          alpha=arpara[1]
          mu = arpara[2] 
          smooth =arpara[3]
           
          u1=length(threshold)
          u2=length(gamma)
          u3=length(sigma)
          u4=length(beta)         
          
          nperiod=tind-28
          
          serials = ini_frail
          dist = 1 
           
        while (dist >= 1.0e-3){
          scorez = rep(0,nperiod)
          fisherz =  matrix(0,nperiod,nperiod)
  
          
          for (tt in 29:tind){
           
                locmacro=which(z[,12]==(tt-1)) #macroeconomic variables
                macroeco=z[locmacro,]
                pointprime = 0
                pointdoubleprime = 0

                for (kk in 1:12){
                     locsurv=which(x[[kk]][,18]==(tt-1))
                     dkl=length(locsurv)
 
                     if (dkl==0){
                         next
                     }
                 
                    covariates=x[[kk]][locsurv,]
                    states=x[[kk]][1,2]
                    nextstates=x[[kk]][1,3] # moved transitions at tt
                 
                    linearmat=firm_cov(covariates,macroeco,beta)
                    
                    
                    if(nextstates < 4){
                                     ESkl=1/sigma[states]*(threshold[nextstates]-gamma[states] - linearmat[[2]]-serials[tt-28])
                                     
                    }
                    
                    if  (nextstates > 1){
                                     ESkl1=1/sigma[states]*(threshold[nextstates-1]-gamma[states] - linearmat[[2]]-serials[tt-28])
                                     
                    }
                    
                    if (nextstates == 4){
                                        pointprime= pointprime + sum(Gcdf(ESkl1)/sigma[states])
                                        pointdoubleprime= pointdoubleprime + sum(((Gcdf(ESkl1)-1)*Gcdf(ESkl1))/sigma[states]^2)
                                        }else{
                                              if (nextstates == 1){
                                                                  pointprime= pointprime + sum((Gcdf(ESkl)-1)/sigma[states])
                                                                  pointdoubleprime= pointdoubleprime + sum(((Gcdf(ESkl)-1)*Gcdf(ESkl))/sigma[states]^2)
                                                                  }else{
                                                                        pointprime= pointprime + sum((Gcdf(ESkl)+Gcdf(ESkl1)-1)/sigma[states])
                                                                        pointdoubleprime= pointdoubleprime + sum(((Gcdf(ESkl)-1)*Gcdf(ESkl)+(Gcdf(ESkl1)-1)*Gcdf(ESkl1))/sigma[states]^2)
                                        
                                                                  }
                                        }
                   
                 }
                
               if ( tt==29){
                           scorez[tt-28]=pointprime -1/smooth^2*(serials[tt-28]-mu+(alpha-1)*(serials[tt-27]-serials[tt-28]-alpha*(mu-serials[tt-28])))
                           fisherz[tt-28,tt-28]=pointdoubleprime-1/smooth^2*(1+(alpha-1)^2)
                           fisherz[tt-28,tt-27]=-1/smooth^2*(alpha-1)
                           fisherz[tt-27,tt-28]=-1/smooth^2*(alpha-1)
               }

               if ( tt==tind){
                           scorez[tt-28]=pointprime -1/smooth^2*(serials[tt-28]-serials[tt-29]-alpha*(mu-serials[tt-29]))
                           fisherz[tt-28,tt-28]=pointdoubleprime-1/smooth^2
               }

               if ( (tt > 29) & (tt < tind) ){
                           scorez[tt-28]=pointprime -1/smooth^2*((serials[tt-28]-serials[tt-29]-alpha*(mu-serials[tt-29]))+(alpha-1)*(serials[tt-27]-serials[tt-28]-alpha*(mu-serials[tt-28])))
                           fisherz[tt-28,tt-28]=pointdoubleprime-1/smooth^2*(1+(alpha-1)^2)
                           fisherz[tt-28,tt-27]=-1/smooth^2*(alpha-1)
                           fisherz[tt-27,tt-28]=-1/smooth^2*(alpha-1)
               }             
               
         }
         
          newserials=matrix(serials)-qr.solve(fisherz)%*%matrix(scorez)
          dist=sqrt(sum((newserials-matrix(serials))^2))
          print(dist)
          serials =newserials          
       }

        list(serials=serials, score=scorez, fisher=-fisherz)
}


arparas=function(alphamu,serials,tind){
        nbase=tind-28
        alpha=alphamu[1]
        mu=alphamu[2]
        
        firstterm=(serials[1]-mu)^2
        secondterm=0
        for (i in 2:nbase){
           secondterm=secondterm+(serials[i]-serials[i-1]-alpha*(mu-serials[i-1]))^2
        }
        
        firstterm+secondterm
}




itr_rating_frailty=function(x,z,para,arpara,tind){
          u=length(para)
          threshold=c(0,para[1], sum(para[1:2]))
          gamma=c(para[3],sum(para[3:4]), sum(para[3:5]))       
          sigma=c(1,para[6:7])          
          beta=c(para[8:u])
          
          u1=length(threshold)
          u2=length(gamma)
          u3=length(sigma)
          u4=length(beta)     

      
           nbase=tind-28
           ini_reg=para
           ini_arpara=arpara
           ini_frail=rep(ini_arpara[2],nbase)
           
           howfar=1
           while(howfar >= 1.0e-4){
                
                out.frailties=serialdepend(x,z,ini_reg,ini_arpara,ini_frail,tind)
                frail_est=out.frailties$serials
                print(frail_est)
                
                arpara_est=c(optim(par=ini_arpara[1:2],arparas,serials=frail_est,tind=tind)[[1]],ini_arpara[3])
                print(arpara_est)
                
                out.regress=optim(par=ini_reg,fn=loglike_frailty, method="L-BFGS-B", control=list(trace=6,maxit=500),low=c(rep(0.01,2),-Inf,rep(0.01,2),rep(0.01,2),rep(-Inf,u4)), x=x,z=z,serials=frail_est, arpara=arpara_est, tind=tind)
                
                if (out.regress$convergence > 0){
                                                print("optimization failed to converge")
                                                break
                }
                 
                regpara_est=out.regress$par
                print(regpara_est)
                
                howfar=sqrt(sum((regpara_est-ini_reg)^2))
                print(howfar)
                ini_reg=regpara_est
                ini_frail=frail_est
                ini_arpara=arpara_est
           }
           
           list(frail_fisher_diag=diag(out.frailties$fisher),reg_loglikelihood=out.regress$value,frailty_est=frail_est,regress_est=regpara_est,arpara_est=arpara_est)
}

#sigma = 0.1

firm_cov=function(x,z,newregress){
            dj=dim(x)[1]
            v11=x[,5]#QR 2
            v12=x[,6] #CR 3
            v13=x[,7] #CL 4 
            v14=x[,8] #IBIT 5
            v15=x[,9] #TL 6
            v16=x[,10] #SALE 7
            v17=x[,11] #WC 8
            v18=x[,12] #NI 9
            v19=x[,13] #RE 10
            v20=x[,14] #DTD 11 
            v21=x[,15] #AGE 12 
            v22=x[,16] #RTN 13
            v23=x[,17] #STD 14
            v24=(x[,4]==1)#ind1 15
            v25=(x[,4]==2)#ind2 16
            v26=(x[,4]==3) #ind3 17
            v27=(x[,4]==4) #ind4 18
                                 
            v28=rep(z[,1],dj) #indexrtn 19
            v29=rep(z[,2],dj) #30yrbond 20
            v30=rep(z[,3],dj) #10yrnote 21
            v31=rep(z[,4],dj) #5yrnote 22
            v32=rep(z[,5],dj) #1yrnote 23
            v33=rep(z[,6],dj) #90daybill 24
            v34=rep(z[,7],dj) #30daybill 25
            v35=rep(z[,8],dj) #CPI 26
            v36=rep(z[,9],dj) #ex inx 27
            v37=rep(z[,10],dj) #unemp 28
            v38=rep(z[,11],dj) #gdpgrt 29
            
	    liquid = -0.13*v12+1.56*v17
            profit = v14
            debt = -1.47*v13+0.37*v15
            RTN = v22
            DTD = v20
            STD = v23 
            IND2 = v25
            IND3 = v26
            B90 = v33
            CPI =v35

            mat=cbind(liquid,profit,debt,RTN,DTD,STD,IND2,IND3,CPI)
           
             if (dj==1){
                        mat=as.matrix(mat)
             }
             
            linear=rowSums(t(matrix(rep(newregress,dj),ncol=dj))*mat)
            list(mat,linear)
}
  

starting=c(13.6195,6.8093,-1.0209,9.9141,7.4340,1.0873,0.7107,+ 	
           -0.9905,-6.2286,0.9624,-0.4531,-0.4501,23.4936,-0.2747,0.4417,-0.1159)


arpara_start=c(0.0019,-0.4287,0.1000)


#mle=out.frailty$regress_est
#arpara=out.frailty$arpara_est

 
out.frailty=itr_rating_frailty(x=byratings,z=macroecono,para=starting, arpara=arpara_start,tind=112)                       
out.frailty






# sigma >= 0.5
firm_cov=function(x,z,newregress){
            dj=dim(x)[1]
            v11=x[,5]#QR 2
            v12=x[,6] #CR 3
            v13=x[,7] #CL 4 
            v14=x[,8] #IBIT 5
            v15=x[,9] #TL 6
            v16=x[,10] #SALE 7
            v17=x[,11] #WC 8
            v18=x[,12] #NI 9
            v19=x[,13] #RE 10
            v20=x[,14] #DTD 11 
            v21=x[,15] #AGE 12 
            v22=x[,16] #RTN 13
            v23=x[,17] #STD 14
            v24=(x[,4]==1)#ind1 15
            v25=(x[,4]==2)#ind2 16
            v26=(x[,4]==3) #ind3 17
            v27=(x[,4]==4) #ind4 18
                                 
            v28=rep(z[,1],dj) #indexrtn 19
            v29=rep(z[,2],dj) #30yrbond 20
            v30=rep(z[,3],dj) #10yrnote 21
            v31=rep(z[,4],dj) #5yrnote 22
            v32=rep(z[,5],dj) #1yrnote 23
            v33=rep(z[,6],dj) #90daybill 24
            v34=rep(z[,7],dj) #30daybill 25
            v35=rep(z[,8],dj) #CPI 26
            v36=rep(z[,9],dj) #ex inx 27
            v37=rep(z[,10],dj) #unemp 28
            v38=rep(z[,11],dj) #gdpgrt 29
            
	    liquid = -0.13*v12+1.56*v17
            profit = v14
            debt = -1.47*v13+0.37*v15
            RTN=v22
            DTD = v20
            STD = v23 
            IND2 = v25
            IND3 = v26
            B90 = v33
            CPI =v35

            mat=cbind(liquid,profit,debt,RTN,DTD,STD,IND2,IND3)
           
             if (dj==1){
                        mat=as.matrix(mat)
             }
             
            linear=rowSums(t(matrix(rep(newregress,dj),ncol=dj))*mat)
            list(mat,linear)
}
  

starting=c(13.3253,6.7003,-1.0935,9.8002,7.2956,1.0579,0.6934,+ 
      -0.9668,-6.0242,0.9526,-0.4583,-0.4440,23.0607,-0.2717,0.4214) 

arpara_start=c(0.2209,-1.0258,0.5) 


starting=c(13.7914,6.9500,-1.0798,10.0302,7.5728,1.0942,0.7158,+ 
      -0.9897,-6.1837,0.9871,-0.4846,-0.4531,24.1043,-0.2819,0.4199) 

arpara_start=c(0.4459,-1.1257,1.0) 


starting=c(14.1059,7.1209,-1.0645,10.1875,7.7623,1.1189,0.7327,+ 
      -1.0071,-6.3032,1.0096,-0.5037,-0.4600,24.6628,-0.2902,0.4166) 

arpara_start=c(0.5381,-1.1399,5.0) 


starting=c(14.1059,7.1209,-1.0645,10.1875,7.7623,1.1189,0.7327,+ 
      -1.0071,-6.3032,1.0096,-0.5037,-0.4600,24.6628,-0.2902,0.4166) 

arpara_start=c(0.5381,-1.1399,10.0) 





starting=c(14.1446,7.1428,-1.0598,10.2074,7.7844,1.1220,0.7351,+ 
      -1.0128,-6.3775,1.0110,-0.5048,-0.4609,24.6620,-0.2918,0.4168) 

arpara_start=c(0.5416,-1.1402,20.0) 


  
out.frailty=itr_rating_frailty(x=byratings,z=macroecono,para=starting, arpara=arpara_start,tind=112)                       
out.frailty






# in sample prediction tests



rating_frailty_insample=function(x,z,para,serials,k){
              
          u=length(para) 
          threshold=c(0,para[1], sum(para[1:2]))
          gamma=c(para[3],sum(para[3:4]), sum(para[3:5]))       
          sigma=c(1,para[6:7])          
          beta=c(para[8:u])

      
          locmacro=which(z[,12]==(k-1))#macroeconomic variables
          macroeco=z[locmacro,]

          transition = vector("list", 12) 
          shannon1=0
          mj1=0
          vj1=0
                       
          
          for (kk in 1:12){
                     locsurv=which(x[[kk]][,18]==(k-1))
                     dkl=length(locsurv)
 
                     if (dkl==0){
                         next
                     }
                 
                    covariates=x[[kk]][locsurv,]
                    
                    linearmat=firm_cov(covariates,macroeco,beta) 
                    states=x[[kk]][1,2]
                    
                    pred_prob=numeric()

                    for (jj in 1:4){
                                   if(jj < 4){
                                     ESkl=1/sigma[states]*(threshold[jj]-gamma[states] - linearmat[[2]]-serials[k-28])
                                   }
                    
                                   if  (jj > 1){
                                     ESkl1=1/sigma[states]*(threshold[jj-1]-gamma[states] - linearmat[[2]]-serials[k-28])
                                   }  
                               
                                   
                                   if (jj == 4){
                                        phi_kl= 1-Gcdf(ESkl1)
                                        }else{
                                              if (jj == 1){
                                                          phi_kl= Gcdf(ESkl)
                                                          }else{
                                                                phi_kl=Gcdf(ESkl)-Gcdf(ESkl1)
                                                                }
                                        }
                                    pred_prob=cbind(pred_prob,phi_kl)
                                    
                                   
                    }

                    nextstates=x[[kk]][locsurv,3] # vector of next states at k
                    transition[[kk]]=cbind(pred_prob,nextstates)
                                    
                    
                    #shannon's entrophy
                      for (w in 1:dkl){
                                  if (nextstates[w]==1){
                                          shannon1=shannon1+log(pred_prob[w,1])
                                  }
                                  if (nextstates[w]==2){
                                          shannon1=shannon1+log(pred_prob[w,2])
                                  }
                                  if (nextstates[w]==3){
                                          shannon1=shannon1+log(pred_prob[w,3])
                                  }
                                  if (nextstates[w]==4){
                                          shannon1=shannon1+log(pred_prob[w,4])
                                  } 
                           

                                  mij1=pred_prob[w,1]*log(pred_prob[w,1])+pred_prob[w,2]*log(pred_prob[w,2])+pred_prob[w,3]*log(pred_prob[w,3])+pred_prob[w,4]*log(pred_prob[w,4])
                                  vij1=pred_prob[w,1]*log(pred_prob[w,1])^2+pred_prob[w,2]*log(pred_prob[w,2])^2+pred_prob[w,3]*log(pred_prob[w,3])^2+pred_prob[w,4]*log(pred_prob[w,4])^2-mij1^2              
                       
                       mj1=mj1+sum(mij1)
                       vj1=vj1+sum(vij1) 
                    }                   
                    
        } 
    
        zscore=(shannon1-mj1)/sqrt(vj1)
         

        list(transition=transition,  zscore=zscore)    
}


mle=out.frailty$regress_est
serials=out.frailty$frailty_est


entropy=numeric()
for (tt in 29:112){

              results=rating_frailty_insample(x=byratings,z=macroecono,para=mle,serials=serials,tt)
              entropy=c(entropy,results$zscore)
}