Function Key_word(Chaine As String, occurrence As Integer, Optional Seuil As Integer = 100) As String
Dim s As Variant, T(), T2(), T3(), T4(), T5(), T6(), Tp(), TOc(), oRegExp As Object, Matches As Object
Dim i As Double, dico As Object, k As Integer, Borne As Byte, l As Integer
'Application.Volatile
Chaine = Trim(LCase(Replace(Chaine, """", "")))
Set oRegExp = CreateObject("vbscript.regexp")
With Sheets("liste")
Set Liste = .Range("A2:A" & .Range("A" & .Rows.Count).End(xlUp).Row)
End With
With oRegExp
.Global = True
.MultiLine = True
'traitement des espaces
.Pattern = "(\s+|" & Chr(10) & "|" & Chr(13) & ")"
If .Test(Chaine) = True Then Chaine = .Replace(Chaine, " ")
'traitement des caractères ponctuation
.Pattern = "[,?!;.:…_()\[\]{}“”«»\\|/§~#`^@]+" ' '’-–—
If .Test(Chaine) = True Then Chaine = .Replace(Chaine, " ")
'traitement des apostrophes
.Pattern = "(^|\s)(c|d|l|n|qu|s)(’|')"
If .Test(Chaine) = True Then Chaine = .Replace(Chaine, " ")
'épurage des espaces en trop "
.Pattern = "(\s){2,}"
If .Test(Chaine) = True Then Chaine = .Replace(Chaine, " ")
End With
Set dico = CreateObject("scripting.dictionary")
s = Split(Trim(Chaine))
For i = LBound(s) To UBound(s)
If Application.CountIf(Liste, s(i)) = 0 Then
dico(s(i)) = dico(s(i)) + 1
End If
Next i
T = dico.keys
T2 = dico.items
ReDim Tp(LBound(T2) To UBound(T2))
For i = LBound(T2) To UBound(T2)
Tp(i) = T2(i) / (UBound(s) + 1)
Next i
For i = LBound(Tp) To UBound(Tp)
If Tp(i) <= Seuil / 100 Then
ReDim Preserve T3(LBound(T2) To k)
ReDim Preserve T4(LBound(T2) To k)
T3(k) = T(i)
T4(k) = Tp(i)
k = k + 1
Else
ReDim Preserve T5(LBound(T2) To l)
ReDim Preserve T6(LBound(T2) To l)
T5(l) = T(i)
T6(l) = Tp(i)
l = l + 1
End If
Next i
Borne = Application.Min(dico.Count, occurrence, UBound(T3))
ReDim TOc(LBound(T3) To Borne)
For i = LBound(T3) To Borne
k = Application.Match(Application.Max(T4), T4, 0) - 1
TOc(i) = T3(k) & " (" & Format(T4(k), "0%") & ")": T4(k) = ""
Next i
Key_word = Join(TOc, vbLf)
End Function