library('stringr')
specialChar=c('(','\\',')','[','{','}')
#计算second中与first[offset]相等的字符的偏移位置列表
getCharOffset<-function(fisrt,second,offset){
char<-substr(fisrt,offset,offset)
#cat('offset = ',offset,' char = ',char,'\n',sep='')
#any(logical(0))等于FALSE,因为匹配不上的时候会返回logical(0)
if(any(char==specialChar[specialChar==char])){
char<-paste('\\',char,sep='')
}
index<-str_locate_all(second,char)[[1]]
index.length <- length(index)
if(index.length==0){
0
}else{
index[1:(index.length/2)]
}
}
#计算second中与first[offset]相同的字符的最近偏移
getDistance<-function(first,second,offset){
second.length<-nchar(second)
index<-getCharOffset(first,second,offset)
#最近偏移
minDistance<-min(abs(index-offset))
#cat('index =',index,' minDistance =',minDistance,'\n')
if(second.length>minDistance){
distance<-minDistance
}else{
distance<-second.length
}
(second.length - distance) / second.length
}
#计算first与second的相似度
calculateSimilarity<-function(first,second){
first.length<-nchar(first)
distance<-1:first.length
for(i in 1:first.length){
distance[i] <- getDistance(first,second,i)
#cat('distance =',distance,'\n')
}
sum(distance) / first.length
}
#返回两个字符串之间的相似度
getSimilarity<-function(first,second){
(calculateSimilarity(first,second) + calculateSimilarity(second,first)) / 2
}
|