描述
老大要求做一个违规词检测功能,头都大了,不想说话了,直接上我写的代码
要求看里面的注释自己研究,反正就是违禁词包含空格,原句必须也包含空格才匹配,忽略大小写匹配
代码1.
public class Test1 {
public static void main(String[] args) {
/*
* 合格要求:照以下字符串举例
* 1. "奥 术大师 大所 大付 aaa 炸药" "炸药" true
* 2. "奥 术大师 大所 大付 aaa炸药" "炸药" false
* 3. "奥 术大师 大所 大付 aaa炸药 " "炸药 " false
* 4. "奥 术大师 大所 大付 aaa 炸药 " " 炸药" true
* 5. "aaaa Zy, zzcccccccccccccc" " zy," true
* 6. "aaaa Zy,z zcccccccccccccc" " zy," false
*/
String a = "奥 术大师 大所 大付 aaa炸药";
String b = "奥";
Text text = subStrInfo(a, b);
List<Text.Word> words = text.getText();
for (Text.Word word : words) {
boolean isForbiddenWord = word.isForbiddenWord();
String s = word.getWord();
System.out.println("s:" + s + " is:" + isForbiddenWord);
}
}
/**
* 去除多余的空格
*/
public static String repaceWhiteSapce(String original) {
StringBuilder sb = new StringBuilder();
Boolean isFirstSpace = false;
char c;
for (int i = 0; i < original.length(); i++) {
c = original.charAt(i);
if (c == ' ' || c == '\t') {
if (!isFirstSpace) {
sb.append(c);
isFirstSpace = true;
}
} else {
sb.append(c);
isFirstSpace = false;
}
}
return sb.toString();
}
/**
* 违规词检测,正文分段
*/
public static Text subStrInfo(String source, String violateABanWord) {
int startIndex = 0;
int endIndex = 0;
String splitStr = "";
Text text = new Text();
List<Text.Word> wordList = new ArrayList<>();
boolean isHas = false;
String d = "";
while (source.toLowerCase().contains(violateABanWord.toLowerCase())) {
startIndex = source.toLowerCase().indexOf(violateABanWord.toLowerCase());
endIndex = startIndex + violateABanWord.length();
if (startIndex == 0) {
// 取出索引前面的字符,判断是不是空格
if (source.charAt(endIndex - 1) == ' ' || source.charAt(endIndex) == ' ') {
if (isHas) {
// 如果为true,需要将之前的字符串拼接
splitStr = d + source.substring(0, startIndex);
// 拼接完成
Text.Word word = new Text.Word();
word.setWord(splitStr);
wordList.add(word);
isHas = false;
}
splitStr = source.substring(startIndex, endIndex);
source = source.substring(endIndex, source.length());
Text.Word word = new Text.Word();
word.setWord(splitStr);
word.setForbiddenWord(true);
wordList.add(word);
} else {
isHas = true;
// 存起来
d = source.substring(startIndex, endIndex);
// 进到这里,说明既包含了,但是又不是符合条件的违规词,切除
source = source.substring(endIndex, source.length());
}
} else if (startIndex == 1) {
// 不是空格,不是匹配的,跳过 是空格,是匹配的,添加 从索引为1的位置切割
if (source.charAt(0) == ' ' || source.charAt(1) == ' ') {
splitStr = source.substring(startIndex, endIndex);
source = source.substring(endIndex, source.length());
Text.Word word = new Text.Word();
word.setWord(splitStr);
word.setForbiddenWord(true);
wordList.add(word);
} else {
// 存起来
d = source.substring(startIndex, endIndex);
// 进到这里,说明既包含了,但是又不是符合条件的违规词,切除
source = source.substring(endIndex, source.length());
if (source.equals("")){
// 将d添加到List中
Text.Word word = new Text.Word();
word.setWord(d);
wordList.add(word);
}
}
} else {
// 将0索引到开始索引前面的字符串切割,保存到集合
if (isHas) {
// 如果为true,需要将之前的字符串拼接
splitStr = d + source.substring(0, startIndex);
// 拼接完成
isHas = false;
} else {
splitStr = source.substring(0, startIndex);
}
Text.Word word = new Text.Word();
word.setWord(splitStr);
wordList.add(word);
source = source.substring(startIndex - 1, source.length());
}
}
if (!StringUtils.isEmpty(source)) {
if (!Objects.equals(d, "")) {
Text.Word word = new Text.Word();
word.setWord(d + source);
wordList.add(word);
} else {
Text.Word word = new Text.Word();
word.setWord(source);
wordList.add(word);
}
}
text.setText(wordList);
return text;
}
}
代码2
public class Text {
/**
* 正文
*/
private List<Word> text;
/**
* 词段
*/
public static class Word{
/**
* 是否违禁词
*/
private boolean isForbiddenWord = false;
private String word;
public boolean isForbiddenWord() {
return isForbiddenWord;
}
public void setForbiddenWord(boolean forbiddenWord) {
isForbiddenWord = forbiddenWord;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
}
public List<Word> getText() {
return text;
}
public void setText(List<Word> text) {
this.text = text;
}
}
...
人已经麻了,大脑正在颤抖,不过这个功能,自己研究出来,对操作字符串的操作,会非常深刻,
确实非常锻炼自己的编程思维能力,但是讲真也非常累,代码可能有BUG存在,不想管了,暂时我就这样了,这种代码真的只有自己才能看懂....做个记录

京公网安备 11010502036488号