描述

老大要求做一个违规词检测功能,头都大了,不想说话了,直接上我写的代码

要求看里面的注释自己研究,反正就是违禁词包含空格,原句必须也包含空格才匹配,忽略大小写匹配

代码1.

public class Test1 {
    public static void main(String[] args) {
      
       /*
        *   合格要求:照以下字符串举例
        * 1.  "奥 术大师 大所 大付 aaa 炸药"      "炸药"    true
        * 2.  "奥 术大师 大所 大付 aaa炸药"       "炸药"    false
        * 3.  "奥 术大师 大所 大付 aaa炸药 "      "炸药 "   false
        * 4.  "奥 术大师 大所 大付 aaa 炸药 "     " 炸药"   true
        * 5.  "aaaa Zy, zzcccccccccccccc"      " zy,"     true
        * 6.  "aaaa Zy,z zcccccccccccccc"      " zy,"     false    
        */

        String a = "奥 术大师 大所 大付 aaa炸药";
        String b = "奥";


        Text text = subStrInfo(a, b);
        List<Text.Word> words = text.getText();
        for (Text.Word word : words) {
            boolean isForbiddenWord = word.isForbiddenWord();
            String s = word.getWord();
            System.out.println("s:" + s + "  is:" + isForbiddenWord);
        }

    }

    /**
     * 去除多余的空格
     */
    public static String repaceWhiteSapce(String original) {
        StringBuilder sb = new StringBuilder();
        Boolean isFirstSpace = false;

        char c;
        for (int i = 0; i < original.length(); i++) {
            c = original.charAt(i);
            if (c == ' ' || c == '\t') {
                if (!isFirstSpace) {
                    sb.append(c);
                    isFirstSpace = true;
                }
            } else {
                sb.append(c);
                isFirstSpace = false;
            }
        }
        return sb.toString();
    }

    /**
     * 违规词检测,正文分段
     */
    public static Text subStrInfo(String source, String violateABanWord) {
        int startIndex = 0;
        int endIndex = 0;
        String splitStr = "";

        Text text = new Text();
        List<Text.Word> wordList = new ArrayList<>();

        boolean isHas = false;
        String d = "";
        while (source.toLowerCase().contains(violateABanWord.toLowerCase())) {
            startIndex = source.toLowerCase().indexOf(violateABanWord.toLowerCase());
            endIndex = startIndex + violateABanWord.length();
            
            if (startIndex == 0) {
                // 取出索引前面的字符,判断是不是空格
                if (source.charAt(endIndex - 1) == ' ' || source.charAt(endIndex) == ' ') {
                    if (isHas) {
                        // 如果为true,需要将之前的字符串拼接
                        splitStr = d + source.substring(0, startIndex);
                        // 拼接完成
                        Text.Word word = new Text.Word();
                        word.setWord(splitStr);
                        wordList.add(word);
                        isHas = false;
                    }
                    splitStr = source.substring(startIndex, endIndex);
                    source = source.substring(endIndex, source.length());
                    Text.Word word = new Text.Word();
                    word.setWord(splitStr);
                    word.setForbiddenWord(true);
                    wordList.add(word);
                } else {
                    isHas = true;
                    // 存起来
                    d = source.substring(startIndex, endIndex);
                    // 进到这里,说明既包含了,但是又不是符合条件的违规词,切除
                    source = source.substring(endIndex, source.length());
                }
            } else if (startIndex == 1) {
                // 不是空格,不是匹配的,跳过      是空格,是匹配的,添加  从索引为1的位置切割
                if (source.charAt(0) == ' ' || source.charAt(1) == ' ') {
                    splitStr = source.substring(startIndex, endIndex);
                    source = source.substring(endIndex, source.length());
                    Text.Word word = new Text.Word();
                    word.setWord(splitStr);
                    word.setForbiddenWord(true);
                    wordList.add(word);
                } else {
                    // 存起来
                    d = source.substring(startIndex, endIndex);
                    // 进到这里,说明既包含了,但是又不是符合条件的违规词,切除
                    source = source.substring(endIndex, source.length());
                    if (source.equals("")){
                        // 将d添加到List中
                        Text.Word word = new Text.Word();
                        word.setWord(d);
                        wordList.add(word);
                    }
                }
            } else {
                // 将0索引到开始索引前面的字符串切割,保存到集合
                if (isHas) {
                    // 如果为true,需要将之前的字符串拼接
                    splitStr = d + source.substring(0, startIndex);
                    // 拼接完成
                    isHas = false;
                } else {
                    splitStr = source.substring(0, startIndex);
                }
                Text.Word word = new Text.Word();
                word.setWord(splitStr);
                wordList.add(word);
                source = source.substring(startIndex - 1, source.length());
            }
        }

        if (!StringUtils.isEmpty(source)) {
            if (!Objects.equals(d, "")) {
                Text.Word word = new Text.Word();
                word.setWord(d + source);
                wordList.add(word);
            } else {
                Text.Word word = new Text.Word();
                word.setWord(source);
                wordList.add(word);
            }
        }
        text.setText(wordList);
        return text;
    }

}

代码2

public class Text {

    /**
     * 正文
     */
    private List<Word> text;

    /**
     * 词段
     */
    public static class Word{
        /**
         * 是否违禁词
         */
        private boolean isForbiddenWord = false;

        private String word;
        
        public boolean isForbiddenWord() {
            return isForbiddenWord;
        }

        public void setForbiddenWord(boolean forbiddenWord) {
            isForbiddenWord = forbiddenWord;
        }

        public String getWord() {
            return word;
        }

        public void setWord(String word) {
            this.word = word;
        }
    }
    
    public List<Word> getText() {
        return text;
    }

    public void setText(List<Word> text) {
        this.text = text;
    }
}

...

人已经麻了,大脑正在颤抖,不过这个功能,自己研究出来,对操作字符串的操作,会非常深刻, 确实非常锻炼自己的编程思维能力,但是讲真也非常累,代码可能有BUG存在,不想管了,暂时我就这样了,这种代码真的只有自己才能看懂....做个记录