Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
// This script shows, if found, the kanji and kana for an article
// It then calls another script, bindKana.js, to clean up the display of ruby
// For configuration, please see the documentation
// License: CC0
function setup() {
// If we're not reading an article, do nothing
if (!(mw.config.get( 'wgAction' ) === 'view'
&& mw.config.get( 'wgIsArticle' )
&& !location.search.split('oldid=')[1]
&& !mw.config.get("wgIsMainPage")
&& mw.config.get("wgContentLanguage") !== "ja")) {
return;
}
// Assuming that if there's no wikidata, there're no 1:1 interlanguage links,
// and we don't want cases where a page links to a subsection of a jawiki
// article
if (wikidataId === null) {
return;
}
// Placeholder so other elements don't push it down later
var header;
if ($('#firstHeading').length) { // Vector
header = $('#firstHeading');
} else if ($('.page-heading').length) { // Minerva
header = $('.page-heading');
} else {
console.error("showKanji.js: Couldn't find a page heading. This skin ("
+ mw.config.get( 'skin' ) + ") might not be supported.");
return;
}
header.append("<div id='kanjiInfo' lang='ja' dir='ltr'></div>");
// Get the Japanese label from wikidata
// API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities
$.ajax({
url: "https://www.wikidata.org/w/api.php",
data: {
action: "wbgetentities",
ids: wikidataId,
props: "labels",
languages: "ja",
format: "json",
origin: "*"
},
success: parseJaLabel
});
}
function parseJaLabel(response) {
var wikidataInfo = response.entities[wikidataId];
var jaLabel;
if (!jQuery.isEmptyObject(wikidataInfo.labels.ja)) {
jaLabel = wikidataInfo.labels.ja.value;
}
if (jaLabel) {
jaLabel = jaLabel.toHalfWidth();
buildRegexes(jaLabel);
displayKanji(jaLabel);
} else {
return;
}
// If the japanese title is not just only kana, get the reading
if (!kanjiRegexes.kanaOnly.test(jaLabel)) {
requestKana();
}
}
function buildRegexes(kanji) {
// Strip $kanji of all kanji and kana, adding whatever is left to the regex
var reKanjiKana = /[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴー-]/g;
var kanjiStripped = kanji.replace(reKanjiKana, "");
kanjiStripped += " ";
// Need to add hyphen escaped since it has special behavior in regex classes
kanjiStripped += "\\-";
var kanjiAuxillary = kanjiStripped.replace(/\w/g, "");
kanjiRegexes.latinOnly = /^[A-Za-z0-9\-.?!/,:;@#$%&+=*'"・ ]+$/;
kanjiRegexes.kanaOnly = new RegExp("^[ぁ-ゔァ-ヴー" + kanjiAuxillary + "]+$");
kanjiRegexes.hiraganaOnly = new RegExp("^[ぁ-ゔーA-Za-z" + kanjiAuxillary + "]+$");
kanjiRegexes.katakanaOnly = new RegExp("^[ァ-ヴーA-Za-z" + kanjiAuxillary + "]+$");
// Add midpoint for Latin in titles
if (/\w/.test(kanji)) { kanjiStripped += "・"; }
var leadReBase = "([ぁ-ゔァ-ヴー" + kanjiStripped + "]+)";
var kanjiEscaped = mw.util.escapeRegExp(kanji);
// Account for spaces, but ignore backslash and other misc characters
var reKanjiKanaLatin = /([\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6Aぁ-ゔァ-ヴーA-Za-z0-9])/g;
var kanjiSpaced = kanjiEscaped.replace(/ /g, " ?");
kanjiSpaced = kanjiSpaced.replace(reKanjiKanaLatin, "$1 ?");
// Add kanji to regex to make sure we're not getting the reading of some
// other term
kanjiRegexes.lead = new RegExp(kanjiSpaced + "[^(\n)]*?\\(" + leadReBase, "i"); // brittle
}
function displayKanji(kanji) {
wikidataKanji = kanji;
$('#kanjiInfo').append("<ruby>" + kanji + "</ruby>");
// Add some classes so users can choose to not display for example
// katakana-only kanji in their CSS
if (kanjiRegexes.latinOnly.test(kanji)) {
$("#kanjiInfo").addClass("kanjiInfo-latin-only");
$("#kanjiInfo").prop("title", "Japanese title in Latin script");
$("#kanjiInfo").css("display", "none");
} else if (kanjiRegexes.hiraganaOnly.test(kanji)) {
$("#kanjiInfo").addClass("kanjiInfo-hiragana-only");
$("#kanjiInfo").prop("title", "Japanese title in hiragana");
} else if (kanjiRegexes.katakanaOnly.test(kanji)) {
$("#kanjiInfo").addClass("kanjiInfo-katakana-only");
$("#kanjiInfo").prop("title", "Japanese title in katakana");
} else {
$("#kanjiInfo").prop("title", "Japanese title in kanji");
}
}
function requestKana() {
// API docs: https://www.wikidata.org/w/api.php?action=help&modules=wbgetclaims
// We have to wholesale get all the claims instead of just one because the
// kana might be present as a qualifier to another claim
$.ajax({
url: "https://www.wikidata.org/w/api.php",
data: {
action: "wbgetclaims",
entity: wikidataId,
format: "json",
origin: "*"
},
success: parseKanaClaim
});
}
function parseKanaClaim(response) {
var kana;
var properties = {
title: "P1476",
nativeLabel: "P1705",
officialName: "P1448",
nameInNativeLanguage: "P1559"
};
var nameInKana = "P1814";
// Try getting nameInKana as a qualifier to some properties
for (var prop in properties) {
var pnum = properties[prop];
if (response.claims[pnum]) {
var kanji = response.claims[pnum][0].mainsnak.datavalue.value.text;
if (kanji.replace(/ /g, "") == wikidataKanji.replace(/ /g, "")
&& response.claims[pnum][0].qualifiers
&& response.claims[pnum][0].qualifiers[nameInKana]) {
kana = response.claims[pnum][0].qualifiers[nameInKana][0].datavalue.value;
break;
}
}
}
// Try getting nameInKana as a general claim
if (!kana && response.claims[nameInKana]) {
prop = "nameInKana";
kana = response.claims[nameInKana][0].mainsnak.datavalue.value;
}
// We couldn't find nameInKana
if (!kana) {
getInterlanguage();
return;
}
kana = kana.toHalfWidth();
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-wikidata");
$("#kanjiInfo").addClass("kanjiInfo-wikidata-" + prop);
}
function getInterlanguage() {
var apiUrl = location.origin + "/w/api.php";
// Documentation: https://en.wikipedia.org/w/api.php?action=help&modules=query%2Blanglinks
$.ajax({
url: apiUrl,
data: {
action: "query",
format: "json",
prop: "langlinks",
lllang: "ja",
titles: mw.config.get( 'wgTitle' )
},
success: function(response) {
var pageId = mw.config.get( 'wgArticleId' );
var page = response.query.pages[pageId];
var langlinks = page ? page.langlinks : undefined;
var jaLabel;
if (langlinks) {
jaLabel = langlinks[0]["*"];
jaLabel = jaLabel.replace(/(.*)#.*/, "$1"); // rm anchors
} else {
getWiktionary();
return;
}
scrapeKana(jaLabel);
}
});
}
function scrapeKana(jaLabel) {
// Get jawiki article's lead wikitext
// API docs: https://www.mediawiki.org/w/api.php?action=help&modules=query%2Bextracts
$.ajax({
url: "https://ja.wikipedia.org/w/api.php",
data: {
action: "query",
prop: "extracts",
format: "json",
redirects: true,
exintro: true,
exsentences: 2,
exlimit: 1,
explaintext: true,
titles: jaLabel,
origin: "*"
},
success: getFirstSentence
});
}
function getFirstSentence(response) {
var responsePart = response.query.pages;
// Have to split parsing into two parts since jawiki pageid is unknown
var pageId = Object.keys(responsePart)[0];
var introText = responsePart[pageId].extract;
if (!introText) {
console.error("showKanji.js: TextExtracts failed to get a lead for the Japanese article.");
getWiktionary();
return;
}
var wikitext = introText.toHalfWidth();
var kana;
var kanaSearch = wikitext.match(kanjiRegexes.lead);
if (kanaSearch && kanaSearch.length == 2) {
kana = kanaSearch[1];
} else {
getWiktionary();
return;
}
// Rm trailing characters
kana = kana.replace(/[・、 ]$/, "");
// Abort if our reading is only katakana (for non-Latin) or Latin
if ((!kanjiRegexes.latinOnly.test(wikidataKanji) && kanjiRegexes.katakanaOnly.test(kana))
|| kanjiRegexes.latinOnly.test(kana)) {
getWiktionary();
return;
}
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-jawiki");
}
// Adapted from:
// http://ilog4.blogspot.com/2015/09/javascript-convert-full-width-and-half.html
// https://stackoverflow.com/a/20488304/1995949
// https://en.wikipedia.org/wiki/Halfwidth_and_fullwidth_forms
String.prototype.toHalfWidth = function() {
var halfWidth = this.replace(/[\uff01-\uff5e]/g, function(s) {return String.fromCharCode(s.charCodeAt(0) - 0xFEE0)});
halfWidth = halfWidth.replace(/ /g, " ");
return halfWidth;
};
// We use the English Wiktionary because it has more terms and better structure
function getWiktionary() {
// API docs: https://en.wikipedia.org/w/api.php?action=help&modules=parse
$.ajax({
url: "https://en.wiktionary.org/w/api.php",
data: {
action: "parse",
format: "json",
page: wikidataKanji,
prop: "sections",
origin: "*"
},
success: findJapaneseSection
});
}
function findJapaneseSection(response) {
if (response.error) {
return;
}
var sectionsCount = response.parse.sections.length;
var sectionIndex;
for (let i = 0; i < sectionsCount; i++) {
var sectionHeader = response.parse.sections[i].line;
if (sectionHeader == "Japanese") {
sectionIndex = response.parse.sections[i].index;
break;
}
}
if (sectionIndex == null) {
return;
}
// API docs: https://en.wikipedia.org/w/api.php?action=help&modules=parse
$.ajax({
url: "https://en.wiktionary.org/w/api.php",
data: {
action: "parse",
format: "json",
page: wikidataKanji,
prop: "text",
section: sectionIndex,
origin: "*"
},
success: parseWiktionary
});
}
function parseWiktionary(response) {
var html = response.parse.text["*"];
var parsed = $($.parseHTML(html));
// Wiktionary adds readings as furigana
var headword = parsed.find(".headword:lang(ja)").first();
var seeTable = parsed.find(".Jpan ruby").first();
var kanji = "";
var kana = "";
if (headword.length) {
// Wiktionary already binds their kana, so we have to undo the process to get
// the constituent parts, at least with the current markup
var childNodes = headword[0].childNodes;
for (let i = 0; i < childNodes.length; i++) {
if (childNodes[i].nodeName == "RUBY") {
var ruby = $(childNodes[i]); // convert back to JQuery for convenience
ruby.children("rp").remove();
kana += ruby.children("rt").detach().text();
kanji += ruby.text();
} else if (childNodes[i].nodeType == 3) { // "#text"
kanji += childNodes[i].nodeValue;
kana += childNodes[i].nodeValue;
}
}
if (kanji != wikidataKanji) { return; }
} else if (seeTable.length) {
kanji = seeTable.children("rb").text();
kana = seeTable.children("rt").text();
} else {
return;
}
if (kana) {
displayKana(kana);
$("#kanjiInfo").addClass("kanjiInfo-wiktionary");
}
}
function displayKana(kana) {
$("#kanjiInfo ruby").append("<rt>" + kana + "</rt>");
// Cleanup redundant furigana with another script
var kanjiOnlyRe = /^[\u3400-\u4DB5\u4E00-\u9FCB\uF900-\uFA6A]+$/;
if (!kanjiOnlyRe.test(wikidataKanji)) {
mw.loader.load( '//en.wikipedia.org/w/index.php?title=User:Opencooper/bindKana.js&action=raw&ctype=text/javascript' );
}
}
var wikidataId = mw.config.get( 'wgWikibaseItemId' );
var wikidataKanji;
var kanjiRegexes = {};
$(setup);
You must be logged in to post a comment.