Jump to content

User:Polygnotus/Scripts/Spell.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// <nowiki>
// == Wikipedia Spell Checker ==
// Detects typos in article text using nspell (Hunspell-compatible)
// Adds a "Check spelling" tab to article pages

( function () {
  'use strict';

  // -----------------------------------------------------------------------
  // Configuration — add your own terms to the whitelist
  // -----------------------------------------------------------------------
  const WHITELIST = [
    // Wikipedia / MediaWiki terminology
    'Wikipedia', 'Wikimedia', 'MediaWiki', 'Wikidata', 'Wikisource',
    'Wiktionary', 'Wikinews', 'Wikivoyage', 'Wikibooks', 'Wikiquote',
    'wikitext', 'wikilink', 'wikitable', 'infobox', 'navbox', 'hatnote',
    'portlet', 'AfD', 'CSD', 'BLP', 'NPOV', 'POV', 'WP', 'MOS',
    'disambiguation', 'redirects', 'transclusion',
    // Add your own:
  ];

  // -----------------------------------------------------------------------
  // CDN endpoints
  // -----------------------------------------------------------------------
  const CDN_NSPELL = 'https://cdn.jsdelivr.net/npm/nspell/+esm';
  const CDN_AFF    = 'https://cdn.jsdelivr.net/npm/dictionary-en/index.aff';
  const CDN_DIC    = 'https://cdn.jsdelivr.net/npm/dictionary-en/index.dic';

  // Only run on view action in content namespaces
  const VALID_NS = new Set( [ 0, 2, 4, 10, 12, 14, 100 ] );
  if ( !VALID_NS.has( mw.config.get( 'wgNamespaceNumber' ) ) ) return;
  if ( mw.config.get( 'wgAction' ) !== 'view' ) return;

  let checker      = null;
  let whitelistSet = null;

  // -----------------------------------------------------------------------
  // Load nspell + dictionary (cached after first load)
  // -----------------------------------------------------------------------
  async function initChecker() {
    if ( checker ) return checker;

    mw.notify( 'Loading spell checker…', { tag: 'spellcheck', autoHide: false } );

    const [ { default: nspell }, affText, dicText ] = await Promise.all( [
      import( CDN_NSPELL ),
      fetch( CDN_AFF ).then( r => r.text() ),
      fetch( CDN_DIC ).then( r => r.text() ),
    ] );

    checker      = nspell( { aff: affText, dic: dicText } );
    whitelistSet = new Set( WHITELIST.map( w => w.toLowerCase() ) );

    mw.notify( 'Spell checker ready.', { tag: 'spellcheck', autoHide: true } );
    return checker;
  }

  // -----------------------------------------------------------------------
  // Walk text nodes, skipping elements that should not be checked
  // -----------------------------------------------------------------------
  const SKIP_TAGS = new Set( [
    'CODE', 'PRE', 'MATH', 'SCRIPT', 'STYLE',
    'TEXTAREA', 'SUP',
  ] );

  const SKIP_CLASSES = [
    'mw-editsection', 'reference', 'mw-cite-backlink',
    'noprint', 'mw-headline', 'reflist', 'navbox',
    'infobox', 'wikitable', 'mw-parser-output > .hatnote',
  ];

  function getTextNodes( root ) {
    const walker = document.createTreeWalker(
      root,
      NodeFilter.SHOW_TEXT,
      {
        acceptNode( node ) {
          let el = node.parentElement;
          while ( el && el !== root ) {
            if ( SKIP_TAGS.has( el.tagName ) ) return NodeFilter.FILTER_REJECT;
            for ( const cls of SKIP_CLASSES ) {
              if ( el.classList.contains( cls ) ) return NodeFilter.FILTER_REJECT;
            }
            el = el.parentElement;
          }
          return NodeFilter.FILTER_ACCEPT;
        },
      }
    );

    const nodes = [];
    let node;
    while ( ( node = walker.nextNode() ) ) nodes.push( node );
    return nodes;
  }

  // -----------------------------------------------------------------------
  // Word-level filters — returns true if the word should be skipped
  // -----------------------------------------------------------------------
  function shouldIgnore( word ) {
    if ( word.length <= 2 )                        return true; // too short
    if ( /\d/.test( word ) )                       return true; // contains digit
    if ( word === word.toUpperCase() )             return true; // ALL CAPS abbreviation
    if ( /^[A-Z]/.test( word ) )                  return true; // starts with capital → likely proper noun
    if ( /^https?/.test( word ) )                  return true; // stray URL fragment
    if ( /^[-']+$/.test( word ) )                  return true; // punctuation only
    if ( !/[aeiou]/i.test( word ) )               return true; // no vowels → abbreviation
    if ( whitelistSet.has( word.toLowerCase() ) ) return true; // whitelisted
    return false;
  }

  // -----------------------------------------------------------------------
  // Extract word tokens with their character offsets
  // -----------------------------------------------------------------------
  function extractWords( text ) {
    return [ ...text.matchAll( /[a-zA-Z'']+/g ) ].map( m => ( {
      word: m[ 0 ].replace( /^['-]+|['-]+$/g, '' ),
      index: m.index,
      raw: m[ 0 ],
    } ) );
  }

  // -----------------------------------------------------------------------
  // Wrap a word in a text node with a highlight span
  // -----------------------------------------------------------------------
  function highlightTypo( textNode, index, length, suggestions ) {
    const range = document.createRange();
    range.setStart( textNode, index );
    range.setEnd( textNode, index + length );

    const span = document.createElement( 'span' );
    span.className = 'mw-spellcheck-typo';
    span.style.cssText = 'border-bottom: 2px solid red; cursor: help;';
    span.title = suggestions.length
      ? 'Suggestions: ' + suggestions.join( ', ' )
      : 'No suggestions found';

    range.surroundContents( span );
    return span;
  }

  // -----------------------------------------------------------------------
  // Main spell check routine
  // -----------------------------------------------------------------------
  async function runSpellCheck() {
    const spell   = await initChecker();
    const content = document.getElementById( 'mw-content-text' );
    if ( !content ) return;

    // Clear previous highlights
    content.querySelectorAll( '.mw-spellcheck-typo' ).forEach( el => {
      el.replaceWith( ...el.childNodes );
    } );
    content.normalize();

    const textNodes = getTextNodes( content );
    let typoCount   = 0;

    for ( const node of textNodes ) {
      const words = extractWords( node.nodeValue );

      // Process in reverse order so splitting the node doesn't invalidate
      // the indices of earlier words in the same node
      for ( let i = words.length - 1; i >= 0; i-- ) {
        const { word, index, raw } = words[ i ];

        if ( !word || shouldIgnore( word ) ) continue;
        if ( spell.correct( word ) )         continue;

        const suggestions = spell.suggest( word ).slice( 0, 5 );

        try {
          highlightTypo( node, index, raw.length, suggestions );
          typoCount++;
        } catch ( e ) {
          // surroundContents throws if the range crosses element boundaries
        }
      }
    }

    mw.notify(
      'Spell check complete — ' + typoCount + ' possible typo' + ( typoCount === 1 ? '' : 's' ) + ' found.',
      { tag: 'spellcheck', autoHide: true }
    );
  }

  // -----------------------------------------------------------------------
  // Add tab to article actions
  // -----------------------------------------------------------------------
  mw.loader.using( 'mediawiki.util' ).then( () => {
    const link = mw.util.addPortletLink(
      'p-cactions',
      '#',
      'Check spelling',
      'ca-spellcheck',
      'Check this article for spelling errors'
    );
    if ( link ) {
      link.addEventListener( 'click', e => {
        e.preventDefault();
        runSpellCheck();
      } );
    }
  } );

}() );
// </nowiki>