比这篇新的文章: 计算今天是一年中的第几周
比这篇旧的文章: truncatehanzi - template filter

在文件里查找某个字符串出现的次数

语言: Java, 标签: 字符串查找 2008/08/08发布 1年前更新
作者: jiyu, 点击909次, 评论(0), 收藏者(0), , 打分:

背景
主题: 字体:
001 import java.io.BufferedReader;
002 import java.io.File;
003 import java.io.FileInputStream;
004 import java.io.IOException;
005 import java.io.InputStreamReader;
006 import java.nio.ByteBuffer;
007 import java.nio.CharBuffer;
008 import java.nio.channels.FileChannel;
009 import java.nio.charset.Charset;
010
011 public class StringSearcher {
012
013     public static void main(String[] args) {
014        String file = args[0];
015        String pattern = args[1];
016        int count = 0;
017        long d = System.currentTimeMillis();
018        for (int i = 0; i < 1; i++) {
019            count = searchCountWithNIO(file, pattern);
020        }
021        System.err.println((System.currentTimeMillis() - d) + ":" + count);
022        System.gc();
023        d = System.currentTimeMillis();
024        for (int i = 0; i < 1; i++) {
025            count = searchCountWithBufferReader(file, pattern);
026        }
027        System.err.println((System.currentTimeMillis() - d) + ":" + count);
028        System.gc();
029        d = System.currentTimeMillis();
030        for (int i = 0; i < 1; i++) {
031            count = indexCountWithBufferReader(file, pattern);
032        }
033        System.err.println((System.currentTimeMillis() - d) + ":" + count);
034     }
035
036     public static int searchCountWithNIO(String fileName, String patten) {
037        int StringCount = 0;
038        FileInputStream inputStream = null;
039        try {
040            File file = new File(fileName);
041            inputStream = new FileInputStream(file);
042            FileChannel channel = inputStream.getChannel();
043            ByteBuffer buffer = channel.map(FileChannel.MapMode.READ_ONLY, 0,
044                   channel.size());
045            Charset cs = Charset.defaultCharset();
046            CharBuffer charBuffer = cs.decode(buffer);
047            char d[] = charBuffer.array();
048            char[] a = patten.toCharArray();
049            int next[] = KMP_getNext(a);
050            int i = 0, j = 0;
051            char first =  a[0];
052            int ns = next.length - 1;
053           
054            while (i < d.length) {
055               if (d[i] == a[j]) {
056                   if (j >= ns) {
057                      StringCount++;
058                      j = next[j];
059                   }
060                   i++;
061                   j++;
062               } else {
063                   j = next[j];
064                   if (j <= 0) {
065                      while (i < d.length && d[i] != first)
066                          i++;
067                      j = 1;
068                      i++;
069                   }
070               }
071            }
072        } catch (Exception e) {
073            e.printStackTrace();
074        } finally {
075            try {
076               inputStream.close();
077            } catch (IOException e) {
078               e.printStackTrace();
079            }
080        }
081        return StringCount;
082     }
083   
084     public static int searchCountWithBufferReader(String fileName, String patten) {
085        int StringCount = 0;
086        FileInputStream inputStream = null;
087        try {
088            File file = new File(fileName);
089            inputStream = new FileInputStream(file);
090            InputStreamReader reader = new InputStreamReader(inputStream);
091            BufferedReader bufferedReader = new BufferedReader(reader);
092
093            char[] a = patten.toCharArray();
094            int next[] = KMP_getNext(a);
095            int c =  bufferedReader.read();
096            int j = 0;
097            int ns = next.length - 1;
098            char first = a[0];
099
100            while (c >= 0) {
101
102               if ((char)c == a[j]) {
103
104                   if (j >= ns) {
105                      StringCount++;
106                      j = next[j];
107                   }
108                   j++;
109                   c =  bufferedReader.read();
110               } else {
111                   j = next[j];
112                   if (j <= 0) {
113                      while (c >= 0 && (char)c != first)
114                          c =  bufferedReader.read();
115                      j = 1;
116                      c =  bufferedReader.read();
117                   }
118               }
119            }
120        } catch (Exception e) {
121            e.printStackTrace();
122        } finally {
123            try {
124               inputStream.close();
125            } catch (IOException e) {
126               e.printStackTrace();
127            }
128        }
129        return StringCount;
130     }
131   
132     public static int indexCountWithBufferReader(String fileName, String patten) {
133        int StringCount = 0;
134        FileInputStream inputStream = null;
135        try {
136            File file = new File(fileName);
137            inputStream = new FileInputStream(file);
138            InputStreamReader reader = new InputStreamReader(inputStream);
139            BufferedReader bufferedReader = new BufferedReader(reader);
140            String line = null;
141
142            while ((line = bufferedReader.readLine()) != null) {
143               int start = 0;
144               while ((start = line.indexOf(patten, start)) >= 0) {
145                   // System.err.print(line.indexOf(patten));
146                   // System.err.println(line);
147                   StringCount++;
148                   start++;
149               }
150            }
151        } catch (Exception e) {
152            e.printStackTrace();
153        } finally {
154            try {
155               inputStream.close();
156            } catch (IOException e) {
157               e.printStackTrace();
158            }
159        }
160        return StringCount;
161     }
162    
163     public static int[] KMP_getNext(char[] inStr) {
164        int next[] = new int[inStr.length];
165        int i = 0, j = -1;
166        next[0] = j;
167
168        while (i < inStr.length - 1) {
169            if (j == -1 || inStr[i] == inStr[j]) {
170               i++
171               j++;
172               next[i] = j;
173            } else {
174               j = next[j];
175            }
176        }
177        return next;
178     }
179 }


所有评论,共0条:( 我也来说两句)


发表评论

注册登录后再发表评论