PHPXRef 0.7.1 : MediaWiki-1.24.0 : /extensions/ConfirmEdit/captcha.py source

[Summary view] [Print] [Text view]
   1  #!/usr/bin/python
   2  #
   3  # Script to generate distorted text images for a captcha system.
   4  #
   5  # Copyright (C) 2005 Neil Harris
   6  #
   7  # This program is free software; you can redistribute it and/or modify
   8  # it under the terms of the GNU General Public License as published by
   9  # the Free Software Foundation; either version 2 of the License, or
  10  # (at your option) any later version.
  11  #
  12  # This program is distributed in the hope that it will be useful,
  13  # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15  # GNU General Public License for more details.
  16  #
  17  # You should have received a copy of the GNU General Public License along
  18  # with this program; if not, write to the Free Software Foundation, Inc.,
  19  # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20  # http://www.gnu.org/copyleft/gpl.html
  21  #
  22  # Further tweaks by Brion Vibber <[email protected]>:
  23  # 2006-01-26: Add command-line options for the various parameters
  24  # 2007-02-19: Add --dirs param for hash subdirectory splits
  25  # Tweaks by Greg Sabino Mullane <[email protected]>:
  26  # 2008-01-06: Add regex check to skip words containing other than a-z
  27  
  28  import random
  29  import math
  30  import hashlib
  31  from optparse import OptionParser
  32  import os
  33  import sys
  34  import re
  35  
  36  try:
  37      import Image
  38      import ImageFont
  39      import ImageDraw
  40      import ImageEnhance
  41      import ImageOps
  42  except:
  43      sys.exit("This script requires the Python Imaging Library - http://www.pythonware.com/products/pil/")
  44  
  45  nonalpha = re.compile('[^a-z]') # regex to test for suitability of words
  46  
  47  # Does X-axis wobbly copy, sandwiched between two rotates
  48  def wobbly_copy(src, wob, col, scale, ang):
  49      x, y = src.size
  50      f = random.uniform(4*scale, 5*scale)
  51      p = random.uniform(0, math.pi*2)
  52      rr = ang+random.uniform(-30, 30) # vary, but not too much
  53      int_d = Image.new('RGB', src.size, 0) # a black rectangle
  54      rot = src.rotate(rr, Image.BILINEAR)
  55      # Do a cheap bounding-box op here to try to limit work below
  56      bbx = rot.getbbox()
  57      if bbx == None:
  58          return src
  59      else:
  60          l, t, r, b= bbx
  61      # and only do lines with content on
  62      for i in range(t, b+1):
  63          # Drop a scan line in
  64          xoff = int(math.sin(p+(i*f/y))*wob)
  65          xoff += int(random.uniform(-wob*0.5, wob*0.5))
  66          int_d.paste(rot.crop((0, i, x, i+1)), (xoff, i))
  67      # try to stop blurring from building up
  68      int_d = int_d.rotate(-rr, Image.BILINEAR)
  69      enh = ImageEnhance.Sharpness(int_d)
  70      return enh.enhance(2)
  71  
  72  
  73  def gen_captcha(text, fontname, fontsize, file_name):
  74      """Generate a captcha image"""
  75      # white text on a black background
  76      bgcolor = 0x0
  77      fgcolor = 0xffffff
  78      # create a font object 
  79      font = ImageFont.truetype(fontname,fontsize)
  80      # determine dimensions of the text
  81      dim = font.getsize(text)
  82      # create a new image significantly larger that the text
  83      edge = max(dim[0], dim[1]) + 2*min(dim[0], dim[1])
  84      im = Image.new('RGB', (edge, edge), bgcolor)
  85      d = ImageDraw.Draw(im)
  86      x, y = im.size
  87      # add the text to the image
  88      d.text((x/2-dim[0]/2, y/2-dim[1]/2), text, font=font, fill=fgcolor)
  89      k = 3
  90      wob = 0.20*dim[1]/k
  91      rot = 45
  92      # Apply lots of small stirring operations, rather than a few large ones
  93      # in order to get some uniformity of treatment, whilst
  94      # maintaining randomness
  95      for i in range(k):
  96          im = wobbly_copy(im, wob, bgcolor, i*2+3, rot+0)
  97          im = wobbly_copy(im, wob, bgcolor, i*2+1, rot+45)
  98          im = wobbly_copy(im, wob, bgcolor, i*2+2, rot+90)
  99          rot += 30
 100      
 101      # now get the bounding box of the nonzero parts of the image
 102      bbox = im.getbbox()
 103      bord = min(dim[0], dim[1])/4 # a bit of a border
 104      im = im.crop((bbox[0]-bord, bbox[1]-bord, bbox[2]+bord, bbox[3]+bord))
 105      # and turn into black on white
 106      im = ImageOps.invert(im)
 107          
 108      # save the image, in format determined from filename
 109      im.save(file_name)
 110  
 111  def gen_subdir(basedir, md5hash, levels):
 112      """Generate a subdirectory path out of the first _levels_
 113      characters of _hash_, and ensure the directories exist
 114      under _basedir_."""
 115      subdir = None
 116      for i in range(0, levels):
 117          char = md5hash[i]
 118          if subdir:
 119              subdir = os.path.join(subdir, char)
 120          else:
 121              subdir = char
 122          fulldir = os.path.join(basedir, subdir)
 123          if not os.path.exists(fulldir):
 124              os.mkdir(fulldir)
 125      return subdir
 126  
 127  def try_pick_word(words, blacklist, verbose, nwords, min_length, max_length):
 128      if words is not None:
 129          word = words[random.randint(0,len(words)-1)]
 130          while nwords > 1:
 131              word2 = words[random.randint(0,len(words)-1)]
 132              word = word + word2
 133              nwords = nwords - 1
 134      else:
 135          word = ''
 136          max_length = max_length if max_length > 0 else 10
 137          for i in range(0, random.randint(min_length, max_length)):
 138              word = word + chr(97 + random.randint(0,25))
 139  
 140      if verbose:
 141          print "word is %s" % word
 142  
 143      if len(word) < min_length:
 144          if verbose:
 145              print "skipping word pair '%s' because it has fewer than %d characters" % (word, min_length)
 146          return None
 147  
 148      if max_length > 0 and len(word) > max_length:
 149          if verbose:
 150              print "skipping word pair '%s' because it has more than %d characters" % (word, max_length)
 151          return None
 152  
 153      if nonalpha.search(word):
 154          if verbose:
 155              print "skipping word pair '%s' because it contains non-alphabetic characters" % word
 156          return None
 157  
 158      for naughty in blacklist:
 159          if naughty in word:
 160              if verbose:
 161                  print "skipping word pair '%s' because it contains blacklisted word '%s'" % (word, naughty)
 162              return None
 163      return word
 164  
 165  def pick_word(words, blacklist, verbose, nwords, min_length, max_length):
 166      for x in range(1000): # If we can't find a valid combination in 1000 tries, just give up
 167          word = try_pick_word(words, blacklist, verbose, nwords, min_length, max_length)
 168          if word:
 169              return word
 170      sys.exit("Unable to find valid word combinations")
 171  
 172  def read_wordlist(filename):
 173      f = open(filename)
 174      words = [x.strip().lower() for x in f.readlines()]
 175      f.close()
 176      return words
 177  
 178  if __name__ == '__main__':
 179      """This grabs random words from the dictionary 'words' (one
 180      word per line) and generates a captcha image for each one,
 181      with a keyed salted hash of the correct answer in the filename.
 182      
 183      To check a reply, hash it in the same way with the same salt and
 184      secret key, then compare with the hash value given.
 185      """
 186      script_dir = os.path.dirname(os.path.realpath(__file__))
 187      parser = OptionParser()
 188      parser.add_option("--wordlist", help="A list of words (required)", metavar="WORDS.txt")
 189      parser.add_option("--random", help="Use random charcters instead of a wordlist", action="store_true")
 190      parser.add_option("--key", help="The passphrase set as $wgCaptchaSecret (required)", metavar="KEY")
 191      parser.add_option("--output", help="The directory to put the images in - $wgCaptchaDirectory (required)", metavar="DIR")
 192      parser.add_option("--font", help="The font to use (required)", metavar="FONT.ttf")
 193      parser.add_option("--font-size", help="The font size (default 40)", metavar="N", type='int', default=40)
 194      parser.add_option("--count", help="The maximum number of images to make (default 20)", metavar="N", type='int', default=20)
 195      parser.add_option("--blacklist", help="A blacklist of words that should not be used", metavar="FILE", default=os.path.join(script_dir, "blacklist"))
 196      parser.add_option("--fill", help="Fill the output directory to contain N files, overrides count, cannot be used with --dirs", metavar="N", type='int')
 197      parser.add_option("--dirs", help="Put the images into subdirectories N levels deep - $wgCaptchaDirectoryLevels", metavar="N", type='int')
 198      parser.add_option("--verbose", "-v", help="Show debugging information", action='store_true')
 199      parser.add_option("--number-words", help="Number of words from the wordlist which make a captcha challenge (default 2)", type='int', default=2)
 200      parser.add_option("--min-length", help="Minimum length for a captcha challenge", type='int', default=1)
 201      parser.add_option("--max-length", help="Maximum length for a captcha challenge", type='int', default=-1)
 202      
 203      opts, args = parser.parse_args()
 204  
 205      if opts.wordlist:
 206          wordlist = opts.wordlist
 207      elif opts.random:
 208          wordlist = None
 209      else:
 210          sys.exit("Need to specify a wordlist")
 211      if opts.key:
 212          key = opts.key
 213      else:
 214          sys.exit("Need to specify a key")
 215      if opts.output:
 216          output = opts.output
 217      else:
 218          sys.exit("Need to specify an output directory")
 219      if opts.font and os.path.exists(opts.font):
 220          font = opts.font
 221      else:
 222          sys.exit("Need to specify the location of a font")
 223  
 224      blacklist = read_wordlist(opts.blacklist)
 225      count = opts.count
 226      fill = opts.fill
 227      dirs = opts.dirs
 228      verbose = opts.verbose
 229      fontsize = opts.font_size
 230  
 231      if fill:
 232          count = max(0, fill - len(os.listdir(output)))
 233  
 234      words = None
 235      if wordlist:
 236          words = read_wordlist(wordlist)
 237          words = [x for x in words
 238              if len(x) in (4,5) and x[0] != "f"
 239              and x[0] != x[1] and x[-1] != x[-2]]
 240  
 241      for i in range(count):
 242          word = pick_word(words, blacklist, verbose, opts.number_words, opts.min_length, opts.max_length)
 243          salt = "%08x" % random.randrange(2**32)
 244          # 64 bits of hash is plenty for this purpose
 245          md5hash = hashlib.md5(key+salt+word+key+salt).hexdigest()[:16]
 246          filename = "image_%s_%s.png" % (salt, md5hash)
 247          if dirs:
 248              subdir = gen_subdir(output, md5hash, dirs)
 249              filename = os.path.join(subdir, filename)
 250          if verbose:
 251              print filename
 252          gen_captcha(word, font, fontsize, os.path.join(output, filename))
 253
PHP Cross Reference of MediaWiki-1.24.0

/extensions/ConfirmEdit/ -> captcha.py (source)