Project:
Mailvisa
Code Location:
git://repo.or.cz/mailvisa.gitmaster
/
Outline
add_messages.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
require 'wordlist' require 'tokenize' ### Defaults confdir = ENV['HOME'] + '/settings/mailvisa' $ignorespamheaders = true $weed_threshold = 1 $weed_count = 100000 filename = nil messages = [] ### Functions def add_stream words, stream stream.each do |line| next if line[0,6] == 'X-Spam' tokenize(line).each do |word| next if word.length > 40 count = words[word] words[word] = if count == nil then 1 else count + 1 end if $weed_count != 0 if words.length % $weed_count == 0 words = weed words end end end end end def weed words puts 'Weeding...' count = words.length words.each { |k,v| words.delete k if v <= $weed_threshold } weeded = count - words.length puts "#{weeded} words discarded" $weed_count = $weed_count * 2 if weeded < $weed_count / 10 words end usage = 'USAGE: ' + $0 + ' [options] <wordlist> [<message> ...]' help = <<EOT Valid options are: -c <path> Look for configuration files in <path> (default: $HOME/settings/mailvisa) -i Include X-Spam headers in analysis -w <num> Weed wordlist every <num> words (default: 100000 use 0 to disable weeding) -t <num> Weed words that occur <num> or fewer times (default: 1) EOT ### Main program ## Process command line i = 0 while i < ARGV.length case ARGV[i] when '-h' puts usage print "\n" + help exit when '-c' i = i + 1 confdir = ARGV[i] when /^-/ $stderr.puts 'Invalid option: ' + ARGV[i] $stderr.puts usage exit 0x80 else filename = ARGV[i] i = i + 1 messages = ARGV[i..-1] break end i = i + 1 end if filename == nil $stderr.puts 'No wordlist specified' $stderr.puts usage exit 0x80 end filename = confdir + '/' + filename if filename.index('/') == nil begin fh = open filename rescue print filename + ' not found, will create new file' fh = false end if fh $stderr.print "Loading #{filename}..." wordlist = load_wordlist fh fh.close words = wordlist[:words] message_count = wordlist[:messages] $stderr.puts words.length.to_s + ' words loaded' else words = {} message_count = 0 end if messages.length > 0 messages.each do |x| $stderr.puts "Adding #{x}" fh = open x add_stream words, fh fh.close message_count = message_count + 1 end else add_stream words, $stdin message_count = message_count + 1 end wordlist = { :messages => message_count, :words => words } $stderr.print "Writing #{filename}..." fh = open filename, 'w' dump_wordlist wordlist, fh fh.close $stderr.puts 'done'
