Compare commits
No commits in common. "refactor/refactor-everything" and "main" have entirely different histories.
refactor/r
...
main
@ -31,7 +31,7 @@ publishing {
|
|||||||
publications {
|
publications {
|
||||||
create<MavenPublication>("maven") {
|
create<MavenPublication>("maven") {
|
||||||
groupId = project.group as String
|
groupId = project.group as String
|
||||||
artifactId = "chinese-phonetics"
|
artifactId = "chinese-transliteration"
|
||||||
version = project.version as String
|
version = project.version as String
|
||||||
|
|
||||||
from(components["java"])
|
from(components["java"])
|
||||||
|
@ -1 +1 @@
|
|||||||
rootProject.name = "chinese-phonetics"
|
rootProject.name = "chinese-transliteration"
|
@ -1,76 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinMarkSyllableFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinNumberSyllableFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.SyllableFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.ZhuyinSyllableFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.PinyinNumberSyllableParser
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.SyllableParser
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.ZhuyinSyllableParser
|
|
||||||
|
|
||||||
object ChinesePhonetics {
|
|
||||||
fun getToneFromNumberedPinyin(numberedPinyin: String) =
|
|
||||||
PinyinNumberSyllableParser.parseOrNull(numberedPinyin)?.tone
|
|
||||||
|
|
||||||
fun getToneFromZhuyin(zhuyin: String) =
|
|
||||||
ZhuyinSyllableParser.parseOrNull(zhuyin)?.tone
|
|
||||||
|
|
||||||
fun getTonesFromNumberedPinyinSequence(numberedPinyinSequence: String) = numberedPinyinSequence.trim()
|
|
||||||
.split(Regex("\\s+"))
|
|
||||||
.filter { it.isNotEmpty() }
|
|
||||||
.map { getToneFromNumberedPinyin(it) }
|
|
||||||
|
|
||||||
fun getTonesFromZhuyinSequence(zhuyinSequence: String) = zhuyinSequence.trim()
|
|
||||||
.split(Regex("\\s+"))
|
|
||||||
.filter { it.isNotEmpty() }
|
|
||||||
.map { getToneFromZhuyin(it) }
|
|
||||||
|
|
||||||
fun zhuyinToPinyinWithNumbers(zhuyin: String, strict: Boolean = true) = convertSyllableSequence(
|
|
||||||
input = zhuyin,
|
|
||||||
parser = ZhuyinSyllableParser,
|
|
||||||
formatter = PinyinNumberSyllableFormatter,
|
|
||||||
strict = strict
|
|
||||||
)
|
|
||||||
|
|
||||||
fun zhuyinToPinyinWithToneMarks(zhuyin: String, strict: Boolean = true) = convertSyllableSequence(
|
|
||||||
input = zhuyin,
|
|
||||||
parser = ZhuyinSyllableParser,
|
|
||||||
formatter = PinyinMarkSyllableFormatter,
|
|
||||||
strict = strict
|
|
||||||
)
|
|
||||||
|
|
||||||
fun pinyinWithNumbersToZhuyin(pinyinWithNumbers: String, strict: Boolean = true) = convertSyllableSequence(
|
|
||||||
input = pinyinWithNumbers,
|
|
||||||
parser = PinyinNumberSyllableParser,
|
|
||||||
formatter = ZhuyinSyllableFormatter,
|
|
||||||
strict = strict
|
|
||||||
)
|
|
||||||
|
|
||||||
fun pinyinWithNumbersToToneMarks(pinyinWithNumbers: String, strict: Boolean = true) = convertSyllableSequence(
|
|
||||||
input = pinyinWithNumbers,
|
|
||||||
parser = PinyinNumberSyllableParser,
|
|
||||||
formatter = PinyinMarkSyllableFormatter,
|
|
||||||
strict = strict
|
|
||||||
)
|
|
||||||
|
|
||||||
private fun convertSyllableSequence(
|
|
||||||
input: String,
|
|
||||||
parser: SyllableParser,
|
|
||||||
formatter: SyllableFormatter,
|
|
||||||
strict: Boolean
|
|
||||||
): String {
|
|
||||||
val originalParts = input.trim().split(Regex("\\s+")).filter { it.isNotEmpty() }
|
|
||||||
if (originalParts.isEmpty()) return ""
|
|
||||||
|
|
||||||
val convertedParts = originalParts.map { part ->
|
|
||||||
if (strict) {
|
|
||||||
val syllable = parser.parse(part)
|
|
||||||
formatter.format(syllable)
|
|
||||||
} else {
|
|
||||||
val syllable = parser.parseOrNull(part)
|
|
||||||
syllable?.let { formatter.format(it) } ?: part
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return convertedParts.joinToString(" ")
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,38 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.tone.formatting.PinyinNumberToneFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.tone.formatting.PinyinToneFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.tone.formatting.ZhuyinToneFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.DigitToneParser
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.IntToneParser
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.PinyinToneParser
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.tone.parsing.ZhuyinToneParser
|
|
||||||
|
|
||||||
enum class Tone {
|
|
||||||
FIRST, SECOND, THIRD, FOURTH, FIFTH;
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
fun fromInt(toneNumber: Int) = IntToneParser.parse(toneNumber)
|
|
||||||
fun fromDigit(digit: Char) = DigitToneParser.parse(digit)
|
|
||||||
fun fromPinyinTone(pinyinTone: Char) = PinyinToneParser.parse(pinyinTone)
|
|
||||||
fun fromZhuyinTone(zhuyinTone: Char) = ZhuyinToneParser.parse(zhuyinTone)
|
|
||||||
|
|
||||||
fun fromIntOrNull(toneNumber: Int) = IntToneParser.parseOrNull(toneNumber)
|
|
||||||
fun fromDigitOrNull(digit: Char) = DigitToneParser.parseOrNull(digit)
|
|
||||||
fun fromPinyinToneOrNull(pinyinTone: Char) = PinyinToneParser.parseOrNull(pinyinTone)
|
|
||||||
fun fromZhuyinToneOrNull(zhuyinTone: Char) = ZhuyinToneParser.parseOrNull(zhuyinTone)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Suppress("MagicNumber")
|
|
||||||
fun Tone.toInt() = when (this) {
|
|
||||||
Tone.FIRST -> 1
|
|
||||||
Tone.SECOND -> 2
|
|
||||||
Tone.THIRD -> 3
|
|
||||||
Tone.FOURTH -> 4
|
|
||||||
Tone.FIFTH -> 5
|
|
||||||
}
|
|
||||||
|
|
||||||
fun Tone.toPinyinNumber() = PinyinNumberToneFormatter.format(this)
|
|
||||||
fun Tone.toPinyinTone() = PinyinToneFormatter.format(this)
|
|
||||||
fun Tone.toZhuyinTone() = ZhuyinToneFormatter.format(this)
|
|
@ -1,5 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics
|
|
||||||
|
|
||||||
object Zhuyin {
|
|
||||||
const val SEPARATOR = " "
|
|
||||||
}
|
|
@ -1,3 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.exceptions
|
|
||||||
|
|
||||||
class InvalidSyllableInputException(message: String) : IllegalArgumentException(message)
|
|
@ -1,3 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.exceptions
|
|
||||||
|
|
||||||
class InvalidToneInputException(message: String) : IllegalArgumentException(message)
|
|
@ -1,8 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.syllable
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
|
|
||||||
internal data class Syllable(
|
|
||||||
val basePinyin: String,
|
|
||||||
val tone: Tone
|
|
||||||
)
|
|
@ -1,36 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.syllable
|
|
||||||
|
|
||||||
import java.io.IOException
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Manages the loading and access of Pinyin-Zhuyin transcription data.
|
|
||||||
* Intended for internal use by parsers and formatters.
|
|
||||||
* Loads data lazily upon first access.
|
|
||||||
*/
|
|
||||||
internal object TranscriptionDataRepository {
|
|
||||||
private const val TRANSCRIPTION_RESOURCE_PATH = "/pinyin_zhuyin_transcriptions.tsv"
|
|
||||||
|
|
||||||
val pinyinToZhuyin = loadTranscriptions()
|
|
||||||
val zhuyinToPinyin = pinyinToZhuyin.entries.associate { it.value to it.key }
|
|
||||||
|
|
||||||
fun isValidZhuyin(zhuyin: String) = zhuyin in zhuyinToPinyin
|
|
||||||
fun isValidPinyin(pinyin: String) = pinyin in pinyinToZhuyin
|
|
||||||
|
|
||||||
fun normalize(pinyin: String) = pinyin.lowercase()
|
|
||||||
.replace("v", "ü")
|
|
||||||
.replace("u:", "ü")
|
|
||||||
|
|
||||||
private fun loadTranscriptions(): Map<String, String> {
|
|
||||||
val inputStream = this::class.java.getResourceAsStream(TRANSCRIPTION_RESOURCE_PATH)
|
|
||||||
?: error("Cannot find transcription resource: $TRANSCRIPTION_RESOURCE_PATH")
|
|
||||||
|
|
||||||
return try {
|
|
||||||
inputStream.bufferedReader().useLines { lines ->
|
|
||||||
lines.map { it.split('\t') }
|
|
||||||
.associate { it[0] to it[1] }
|
|
||||||
}
|
|
||||||
} catch (e: IOException) {
|
|
||||||
throw IOException("Failed to load transcription data from $TRANSCRIPTION_RESOURCE_PATH", e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,38 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
|
||||||
import com.marvinelsen.chinese.phonetics.toPinyinTone
|
|
||||||
|
|
||||||
internal data object PinyinMarkSyllableFormatter : SyllableFormatter {
|
|
||||||
private fun findVowelIndexForToneMark(pinyin: String): Int {
|
|
||||||
val characterToIndex = pinyin.withIndex().associate { it.value to it.index }
|
|
||||||
return when {
|
|
||||||
'a' in characterToIndex -> characterToIndex['a']!!
|
|
||||||
'o' in characterToIndex -> characterToIndex['o']!!
|
|
||||||
'e' in characterToIndex -> characterToIndex['e']!!
|
|
||||||
'i' in characterToIndex ->
|
|
||||||
if (pinyin.elementAtOrNull(characterToIndex['i']!! + 1) == 'u') {
|
|
||||||
characterToIndex['u']!!
|
|
||||||
} else {
|
|
||||||
characterToIndex['i']!!
|
|
||||||
}
|
|
||||||
|
|
||||||
'u' in characterToIndex -> characterToIndex['u']!!
|
|
||||||
'ü' in characterToIndex -> characterToIndex['ü']!!
|
|
||||||
else -> error("No vowel found in Pinyin syllable '$pinyin'")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
override fun format(syllable: Syllable): String {
|
|
||||||
if (syllable.tone == Tone.FIFTH) return syllable.basePinyin
|
|
||||||
|
|
||||||
val vowelIndex = findVowelIndexForToneMark(syllable.basePinyin)
|
|
||||||
val toneMark = syllable.tone.toPinyinTone()
|
|
||||||
|
|
||||||
return buildString {
|
|
||||||
append(syllable.basePinyin)
|
|
||||||
insert(vowelIndex + 1, toneMark)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,8 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
|
||||||
import com.marvinelsen.chinese.phonetics.toPinyinNumber
|
|
||||||
|
|
||||||
internal data object PinyinNumberSyllableFormatter : SyllableFormatter {
|
|
||||||
override fun format(syllable: Syllable) = syllable.basePinyin + syllable.tone.toPinyinNumber()
|
|
||||||
}
|
|
@ -1,7 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
|
||||||
|
|
||||||
internal sealed interface SyllableFormatter {
|
|
||||||
fun format(syllable: Syllable): String
|
|
||||||
}
|
|
@ -1,18 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.syllable.formatting
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.TranscriptionDataRepository
|
|
||||||
import com.marvinelsen.chinese.phonetics.toZhuyinTone
|
|
||||||
|
|
||||||
internal data object ZhuyinSyllableFormatter : SyllableFormatter {
|
|
||||||
override fun format(syllable: Syllable): String {
|
|
||||||
val zhuyinBase = TranscriptionDataRepository.pinyinToZhuyin[syllable.basePinyin]!!
|
|
||||||
val zhuyinToneMark = syllable.tone.toZhuyinTone()
|
|
||||||
|
|
||||||
return when (syllable.tone) {
|
|
||||||
Tone.FIFTH -> zhuyinToneMark + zhuyinBase
|
|
||||||
Tone.FIRST, Tone.SECOND, Tone.THIRD, Tone.FOURTH -> zhuyinBase + zhuyinToneMark
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,25 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.syllable.parsing
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.TranscriptionDataRepository
|
|
||||||
|
|
||||||
internal data object PinyinNumberSyllableParser : SyllableParser {
|
|
||||||
@Suppress("ReturnCount", "MagicNumber")
|
|
||||||
override fun parseOrNull(input: String): Syllable? {
|
|
||||||
if (input.isBlank() || !input.last().isDigit()) return null
|
|
||||||
|
|
||||||
val toneNumber = input.last()
|
|
||||||
val tone = Tone.fromDigitOrNull(toneNumber) ?: return null
|
|
||||||
|
|
||||||
val pinyin = input.substring(0, input.lastIndex)
|
|
||||||
val normalizedPinyin = TranscriptionDataRepository.normalize(pinyin)
|
|
||||||
|
|
||||||
if (!TranscriptionDataRepository.isValidPinyin(normalizedPinyin)) return null
|
|
||||||
|
|
||||||
return Syllable(
|
|
||||||
basePinyin = normalizedPinyin,
|
|
||||||
tone = tone
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.syllable.parsing
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.exceptions.InvalidSyllableInputException
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
|
||||||
|
|
||||||
internal sealed interface SyllableParser {
|
|
||||||
fun parseOrNull(input: String): Syllable?
|
|
||||||
fun parse(input: String) = parseOrNull(input)
|
|
||||||
?: throw InvalidSyllableInputException("Invalid input for syllable parsing: '$input'")
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.syllable.parsing
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.TranscriptionDataRepository
|
|
||||||
|
|
||||||
internal data object ZhuyinSyllableParser : SyllableParser {
|
|
||||||
private val zhuyinToneMarkRegex = """[ˊˇˋ˙¯]""".toRegex()
|
|
||||||
|
|
||||||
override fun parseOrNull(input: String): Syllable? {
|
|
||||||
val zhuyinWithoutToneMark = input.replace(zhuyinToneMarkRegex, "")
|
|
||||||
|
|
||||||
if (!TranscriptionDataRepository.isValidZhuyin(zhuyinWithoutToneMark)) return null
|
|
||||||
|
|
||||||
return Syllable(
|
|
||||||
basePinyin = TranscriptionDataRepository.zhuyinToPinyin[zhuyinWithoutToneMark]!!,
|
|
||||||
tone = Tone.fromZhuyinToneOrNull(input.last()) ?: Tone.fromZhuyinToneOrNull(input.first()) ?: Tone.FIRST
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
|
|
||||||
internal data object PinyinNumberToneFormatter : ToneFormatter {
|
|
||||||
override fun format(tone: Tone) = when (tone) {
|
|
||||||
Tone.FIRST -> "1"
|
|
||||||
Tone.SECOND -> "2"
|
|
||||||
Tone.THIRD -> "3"
|
|
||||||
Tone.FOURTH -> "4"
|
|
||||||
Tone.FIFTH -> "5"
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
|
|
||||||
internal data object PinyinToneFormatter : ToneFormatter {
|
|
||||||
override fun format(tone: Tone) = when (tone) {
|
|
||||||
Tone.FIRST -> "\u0304"
|
|
||||||
Tone.SECOND -> "\u0301"
|
|
||||||
Tone.THIRD -> "\u030C"
|
|
||||||
Tone.FOURTH -> "\u0300"
|
|
||||||
Tone.FIFTH -> ""
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,7 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
|
|
||||||
internal sealed interface ToneFormatter {
|
|
||||||
fun format(tone: Tone): String
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.tone.formatting
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
|
|
||||||
internal data object ZhuyinToneFormatter : ToneFormatter {
|
|
||||||
override fun format(tone: Tone) = when (tone) {
|
|
||||||
Tone.FIRST -> ""
|
|
||||||
Tone.SECOND -> "ˊ"
|
|
||||||
Tone.THIRD -> "ˇ"
|
|
||||||
Tone.FOURTH -> "ˋ"
|
|
||||||
Tone.FIFTH -> "˙"
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,14 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
|
|
||||||
internal data object DigitToneParser : ToneParser<Char> {
|
|
||||||
override fun parseOrNull(input: Char) = when (input) {
|
|
||||||
'1' -> Tone.FIRST
|
|
||||||
'2' -> Tone.SECOND
|
|
||||||
'3' -> Tone.THIRD
|
|
||||||
'4' -> Tone.FOURTH
|
|
||||||
'5' -> Tone.FIFTH
|
|
||||||
else -> null
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,15 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
|
|
||||||
@Suppress("MagicNumber")
|
|
||||||
internal data object IntToneParser : ToneParser<Int> {
|
|
||||||
override fun parseOrNull(input: Int) = when (input) {
|
|
||||||
1 -> Tone.FIRST
|
|
||||||
2 -> Tone.SECOND
|
|
||||||
3 -> Tone.THIRD
|
|
||||||
4 -> Tone.FOURTH
|
|
||||||
5 -> Tone.FIFTH
|
|
||||||
else -> null
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
|
|
||||||
internal data object PinyinToneParser : ToneParser<Char> {
|
|
||||||
override fun parseOrNull(input: Char) = when (input) {
|
|
||||||
'\u0304' -> Tone.FIRST
|
|
||||||
'\u0301' -> Tone.SECOND
|
|
||||||
'\u030C' -> Tone.THIRD
|
|
||||||
'\u0300' -> Tone.FOURTH
|
|
||||||
else -> null
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
import com.marvinelsen.chinese.phonetics.exceptions.InvalidToneInputException
|
|
||||||
|
|
||||||
internal sealed interface ToneParser<T> {
|
|
||||||
fun parseOrNull(input: T): Tone?
|
|
||||||
fun parse(input: T) = parseOrNull(input)
|
|
||||||
?: throw InvalidToneInputException("Invalid input for tone parsing: '$input'")
|
|
||||||
}
|
|
@ -1,14 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics.internal.tone.parsing
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.Tone
|
|
||||||
|
|
||||||
internal data object ZhuyinToneParser : ToneParser<Char> {
|
|
||||||
override fun parseOrNull(input: Char) = when (input) {
|
|
||||||
'¯' -> Tone.FIRST
|
|
||||||
'ˊ' -> Tone.SECOND
|
|
||||||
'ˇ' -> Tone.THIRD
|
|
||||||
'ˋ' -> Tone.FOURTH
|
|
||||||
'˙' -> Tone.FIFTH
|
|
||||||
else -> null
|
|
||||||
}
|
|
||||||
}
|
|
@ -0,0 +1,119 @@
|
|||||||
|
package com.marvinelsen.chinese.transliteration.api
|
||||||
|
|
||||||
|
import java.io.InputStream
|
||||||
|
|
||||||
|
@Suppress("MagicNumber", "MaximumLineLength", "MaxLineLength")
|
||||||
|
data class PinyinSyllable(
|
||||||
|
val pinyinSyllableWithoutTone: String,
|
||||||
|
val tone: Tone,
|
||||||
|
) {
|
||||||
|
companion object {
|
||||||
|
private val pinyinToZhuyin = parseTranscriptions(
|
||||||
|
this::class.java.getResourceAsStream("/pinyin_zhuyin_transcriptions.tsv")!!
|
||||||
|
)
|
||||||
|
private val zhuyinToPinyin = pinyinToZhuyin.entries.associate { it.value to it.key }
|
||||||
|
private val zhuyinToneMarkRegex = """[ˊˇˋ˙]""".toRegex()
|
||||||
|
|
||||||
|
fun isValidPinyinWithToneNumberSyllable(pinyinSyllable: String) =
|
||||||
|
pinyinSyllable.last().isDigit() && pinyinSyllable.last().digitToInt() in 1..5 && pinyinSyllable
|
||||||
|
.substring(0, pinyinSyllable.lastIndex)
|
||||||
|
.lowercase() in pinyinToZhuyin
|
||||||
|
|
||||||
|
fun fromPinyinWithToneNumber(pinyinWithToneNumber: String): PinyinSyllable {
|
||||||
|
val pinyinWithoutNumber = pinyinWithToneNumber.substring(0, pinyinWithToneNumber.lastIndex)
|
||||||
|
val lastCharacter = pinyinWithToneNumber.last()
|
||||||
|
|
||||||
|
require(lastCharacter.isDigit()) {
|
||||||
|
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the last character to be a digit, but was '${pinyinWithToneNumber.last()}'"
|
||||||
|
}
|
||||||
|
require(lastCharacter.digitToInt() in 1..5) {
|
||||||
|
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the tone number 'n' to be in range 1 <= n <= 5, but was '${pinyinWithToneNumber.last()}'"
|
||||||
|
}
|
||||||
|
require(
|
||||||
|
pinyinWithoutNumber.lowercase() in pinyinToZhuyin
|
||||||
|
) { "'$pinyinWithoutNumber' is not a valid Pinyin syllable." }
|
||||||
|
|
||||||
|
return PinyinSyllable(
|
||||||
|
pinyinSyllableWithoutTone = pinyinWithoutNumber,
|
||||||
|
tone = Tone.fromDigit(lastCharacter)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun fromZhuyin(zhuyin: String): PinyinSyllable {
|
||||||
|
val zhuyinWithoutToneMark = zhuyin.replace(zhuyinToneMarkRegex, "")
|
||||||
|
|
||||||
|
require(zhuyinWithoutToneMark in zhuyinToPinyin) { "'$zhuyin' is not a valid Zhuyin syllable." }
|
||||||
|
|
||||||
|
return PinyinSyllable(
|
||||||
|
zhuyinToPinyin[zhuyinWithoutToneMark]!!,
|
||||||
|
Tone.fromZhuyinToneMarkOrNull(zhuyin.last()) ?: Tone.fromZhuyinToneMarkOrNull(zhuyin.first())
|
||||||
|
?: Tone.FIRST
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun parseTranscriptions(inputStream: InputStream) =
|
||||||
|
inputStream.bufferedReader().useLines { lines ->
|
||||||
|
lines.map { it.split('\t') }
|
||||||
|
.associate { it[0] to it[1] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
|
||||||
|
TransliterationSystem.ZHUYIN -> formatToZhuyin()
|
||||||
|
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
|
||||||
|
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun formatToZhuyin(): String {
|
||||||
|
val zhuyinSyllable = pinyinToZhuyin[pinyinSyllableWithoutTone.lowercase()]
|
||||||
|
?: error("$pinyinSyllableWithoutTone is not a valid Pinyin syllable")
|
||||||
|
val zhuyinToneMark = tone.format(TransliterationSystem.ZHUYIN)
|
||||||
|
|
||||||
|
return when (tone) {
|
||||||
|
Tone.FIRST, Tone.SECOND, Tone.THIRD, Tone.FORTH -> zhuyinSyllable + zhuyinToneMark
|
||||||
|
Tone.FIFTH -> zhuyinToneMark + zhuyinSyllable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun formatToPinyinWithToneNumbers(): String {
|
||||||
|
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
|
||||||
|
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
|
||||||
|
}
|
||||||
|
|
||||||
|
return pinyinSyllableWithoutTone + tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun formatToPinyinWithToneMarks(): String {
|
||||||
|
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
|
||||||
|
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pinyinSyllableWithoutTone.lowercase() == "r" && tone == Tone.FIFTH) {
|
||||||
|
return pinyinSyllableWithoutTone
|
||||||
|
}
|
||||||
|
|
||||||
|
val sanitizedPinyinSyllableWithoutTone = pinyinSyllableWithoutTone.replace("v", "ü").replace("u:", "ü")
|
||||||
|
|
||||||
|
val characterToIndex = sanitizedPinyinSyllableWithoutTone.lowercase().withIndex().associate { it.value to it.index }
|
||||||
|
val vowelIndex = when {
|
||||||
|
'a' in characterToIndex -> characterToIndex['a']!!
|
||||||
|
'o' in characterToIndex -> characterToIndex['o']!!
|
||||||
|
'e' in characterToIndex -> characterToIndex['e']!!
|
||||||
|
'i' in characterToIndex ->
|
||||||
|
if (sanitizedPinyinSyllableWithoutTone.elementAtOrNull(characterToIndex['i']!! + 1) == 'u') {
|
||||||
|
characterToIndex['u']!!
|
||||||
|
} else {
|
||||||
|
characterToIndex['i']!!
|
||||||
|
}
|
||||||
|
|
||||||
|
'u' in characterToIndex -> characterToIndex['u']!!
|
||||||
|
'ü' in characterToIndex -> characterToIndex['ü']!!
|
||||||
|
else -> error("No vowel found in Pinyin syllable '$sanitizedPinyinSyllableWithoutTone'")
|
||||||
|
}
|
||||||
|
|
||||||
|
return buildString {
|
||||||
|
append(sanitizedPinyinSyllableWithoutTone)
|
||||||
|
insert(vowelIndex + 1, tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,81 @@
|
|||||||
|
package com.marvinelsen.chinese.transliteration.api
|
||||||
|
|
||||||
|
@Suppress("MagicNumber")
|
||||||
|
enum class Tone {
|
||||||
|
FIRST, SECOND, THIRD, FORTH, FIFTH;
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
fun fromInt(number: Int) =
|
||||||
|
fromIntOrNull(number) ?: throw IllegalArgumentException("Number $number is not a valid tone")
|
||||||
|
|
||||||
|
fun fromIntOrNull(number: Int) = when (number) {
|
||||||
|
1 -> FIRST
|
||||||
|
2 -> SECOND
|
||||||
|
3 -> THIRD
|
||||||
|
4 -> FORTH
|
||||||
|
5 -> FIFTH
|
||||||
|
else -> null
|
||||||
|
}
|
||||||
|
|
||||||
|
fun fromDigit(digit: Char) =
|
||||||
|
fromDigitOrNull(digit) ?: throw IllegalArgumentException("Digit $digit is not a valid tone")
|
||||||
|
|
||||||
|
fun fromDigitOrNull(digit: Char) = when (digit) {
|
||||||
|
'1' -> FIRST
|
||||||
|
'2' -> SECOND
|
||||||
|
'3' -> THIRD
|
||||||
|
'4' -> FORTH
|
||||||
|
'5' -> FIFTH
|
||||||
|
else -> null
|
||||||
|
}
|
||||||
|
|
||||||
|
fun fromZhuyinToneMark(zhuyinToneMark: Char) = fromZhuyinToneMarkOrNull(zhuyinToneMark)
|
||||||
|
?: throw IllegalArgumentException("Invalid zhuyin tone mark '$zhuyinToneMark'")
|
||||||
|
|
||||||
|
fun fromZhuyinToneMarkOrNull(zhuyinToneMark: Char) = when (zhuyinToneMark) {
|
||||||
|
'ˊ' -> SECOND
|
||||||
|
'ˇ' -> THIRD
|
||||||
|
'ˋ' -> FORTH
|
||||||
|
'˙' -> FIFTH
|
||||||
|
else -> null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun toInt() = when (this) {
|
||||||
|
FIRST -> 1
|
||||||
|
SECOND -> 2
|
||||||
|
THIRD -> 3
|
||||||
|
FORTH -> 4
|
||||||
|
FIFTH -> 5
|
||||||
|
}
|
||||||
|
|
||||||
|
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
|
||||||
|
TransliterationSystem.ZHUYIN -> formatToZhuyin()
|
||||||
|
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
|
||||||
|
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun formatToPinyinWithToneNumbers() = when (this) {
|
||||||
|
FIRST -> "1"
|
||||||
|
SECOND -> "2"
|
||||||
|
THIRD -> "3"
|
||||||
|
FORTH -> "4"
|
||||||
|
FIFTH -> "5"
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun formatToPinyinWithToneMarks() = when (this) {
|
||||||
|
FIRST -> "\u0304"
|
||||||
|
SECOND -> "\u0301"
|
||||||
|
THIRD -> "\u030C"
|
||||||
|
FORTH -> "\u0300"
|
||||||
|
FIFTH -> ""
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun formatToZhuyin() = when (this) {
|
||||||
|
FIRST -> ""
|
||||||
|
SECOND -> "ˊ"
|
||||||
|
THIRD -> "ˇ"
|
||||||
|
FORTH -> "ˋ"
|
||||||
|
FIFTH -> "˙"
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,5 @@
|
|||||||
|
package com.marvinelsen.chinese.transliteration.api
|
||||||
|
|
||||||
|
enum class TransliterationSystem {
|
||||||
|
ZHUYIN, PINYIN_WITH_TONE_NUMBERS, PINYIN_WITH_TONE_MARKS
|
||||||
|
}
|
@ -0,0 +1,5 @@
|
|||||||
|
package com.marvinelsen.chinese.transliteration.api
|
||||||
|
|
||||||
|
object Zhuyin {
|
||||||
|
const val SEPARATOR = " "
|
||||||
|
}
|
@ -195,9 +195,17 @@ lu ㄌㄨ
|
|||||||
luan ㄌㄨㄢ
|
luan ㄌㄨㄢ
|
||||||
lun ㄌㄨㄣ
|
lun ㄌㄨㄣ
|
||||||
luo ㄌㄨㄛ
|
luo ㄌㄨㄛ
|
||||||
|
lu: ㄌㄩ
|
||||||
|
lv ㄌㄩ
|
||||||
lü ㄌㄩ
|
lü ㄌㄩ
|
||||||
|
lu:e ㄌㄩㄝ
|
||||||
|
lve ㄌㄩㄝ
|
||||||
lüe ㄌㄩㄝ
|
lüe ㄌㄩㄝ
|
||||||
|
lu:n ㄌㄩㄣ
|
||||||
|
lvn ㄌㄩㄣ
|
||||||
lün ㄌㄩㄣ
|
lün ㄌㄩㄣ
|
||||||
|
lu:an ㄌㄩㄢ
|
||||||
|
lvan ㄌㄩㄢ
|
||||||
lüan ㄌㄩㄢ
|
lüan ㄌㄩㄢ
|
||||||
m ㄇ
|
m ㄇ
|
||||||
ma ㄇㄚ
|
ma ㄇㄚ
|
||||||
@ -243,7 +251,11 @@ nu ㄋㄨ
|
|||||||
nuan ㄋㄨㄢ
|
nuan ㄋㄨㄢ
|
||||||
nun ㄋㄨㄣ
|
nun ㄋㄨㄣ
|
||||||
nuo ㄋㄨㄛ
|
nuo ㄋㄨㄛ
|
||||||
|
nu: ㄋㄩ
|
||||||
|
nv ㄋㄩ
|
||||||
nü ㄋㄩ
|
nü ㄋㄩ
|
||||||
|
nu:e ㄋㄩㄝ
|
||||||
|
nve ㄋㄩㄝ
|
||||||
nüe ㄋㄩㄝ
|
nüe ㄋㄩㄝ
|
||||||
o ㄛ
|
o ㄛ
|
||||||
ou ㄡ
|
ou ㄡ
|
||||||
|
|
@ -1,113 +0,0 @@
|
|||||||
package com.marvinelsen.chinese.phonetics
|
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.Syllable
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinMarkSyllableFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.PinyinNumberSyllableFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.formatting.ZhuyinSyllableFormatter
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.PinyinNumberSyllableParser
|
|
||||||
import com.marvinelsen.chinese.phonetics.internal.syllable.parsing.ZhuyinSyllableParser
|
|
||||||
import io.kotest.core.spec.style.ShouldSpec
|
|
||||||
import io.kotest.datatest.withData
|
|
||||||
import io.kotest.matchers.shouldBe
|
|
||||||
|
|
||||||
class ChinesePhoneticsTest : ShouldSpec({
|
|
||||||
context("from pinyin with tone numbers") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
|
||||||
"sheng1" to Syllable("sheng", Tone.FIRST),
|
|
||||||
"zhi2" to Syllable("zhi", Tone.SECOND),
|
|
||||||
"ka3" to Syllable("ka", Tone.THIRD),
|
|
||||||
"yao4" to Syllable("yao", Tone.FOURTH),
|
|
||||||
"me5" to Syllable("me", Tone.FIFTH),
|
|
||||||
"Me5" to Syllable("me", Tone.FIFTH),
|
|
||||||
"nv3" to Syllable("nü", Tone.THIRD),
|
|
||||||
"nü3" to Syllable("nü", Tone.THIRD),
|
|
||||||
"nu:3" to Syllable("nü", Tone.THIRD),
|
|
||||||
"r5" to Syllable("r", Tone.FIFTH),
|
|
||||||
"R5" to Syllable("r", Tone.FIFTH),
|
|
||||||
"er2" to Syllable("er", Tone.SECOND),
|
|
||||||
"Er2" to Syllable("er", Tone.SECOND),
|
|
||||||
) { (pinyinWithNumber, expectedSyllable) ->
|
|
||||||
PinyinNumberSyllableParser.parse(pinyinWithNumber) shouldBe expectedSyllable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
context("from zhuyin") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
|
||||||
"ㄕㄥ" to Syllable("sheng", Tone.FIRST),
|
|
||||||
"ㄓˊ" to Syllable("zhi", Tone.SECOND),
|
|
||||||
"ㄎㄚˇ" to Syllable("ka", Tone.THIRD),
|
|
||||||
"ㄧㄠˋ" to Syllable("yao", Tone.FOURTH),
|
|
||||||
"ㄇㄜ˙" to Syllable("me", Tone.FIFTH),
|
|
||||||
"˙ㄇㄜ" to Syllable("me", Tone.FIFTH),
|
|
||||||
"ㄋㄩˇ" to Syllable("nü", Tone.THIRD),
|
|
||||||
) { (zhuyin, expectedSyllable) ->
|
|
||||||
ZhuyinSyllableParser.parse(zhuyin) shouldBe expectedSyllable
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("from invalid pinyin with tone numbers") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("from invalid zhuyin") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to zhuyin") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "$it -> ${it.second}" },
|
|
||||||
Syllable("sheng", Tone.FIRST) to "ㄕㄥ",
|
|
||||||
Syllable("zhi", Tone.SECOND) to "ㄓˊ",
|
|
||||||
Syllable("ka", Tone.THIRD) to "ㄎㄚˇ",
|
|
||||||
Syllable("yao", Tone.FOURTH) to "ㄧㄠˋ",
|
|
||||||
Syllable("me", Tone.FIFTH) to "˙ㄇㄜ",
|
|
||||||
Syllable("nü", Tone.THIRD) to "ㄋㄩˇ",
|
|
||||||
Syllable("r", Tone.FIFTH) to "˙ㄦ",
|
|
||||||
Syllable("er", Tone.SECOND) to "ㄦˊ",
|
|
||||||
) { (syllable, expectedZhuyin) ->
|
|
||||||
ZhuyinSyllableFormatter.format(syllable) shouldBe expectedZhuyin
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone numbers") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "$it -> ${it.second}" },
|
|
||||||
Syllable("sheng", Tone.FIRST) to "sheng1",
|
|
||||||
Syllable("zhi", Tone.SECOND) to "zhi2",
|
|
||||||
Syllable("ka", Tone.THIRD) to "ka3",
|
|
||||||
Syllable("yao", Tone.FOURTH) to "yao4",
|
|
||||||
Syllable("me", Tone.FIFTH) to "me5",
|
|
||||||
Syllable("nü", Tone.THIRD) to "nü3",
|
|
||||||
Syllable("r", Tone.FIFTH) to "r5",
|
|
||||||
Syllable("er", Tone.SECOND) to "er2",
|
|
||||||
) { (syllable, expectedPinyinWithToneNumbers) ->
|
|
||||||
PinyinNumberSyllableFormatter.format(syllable) shouldBe expectedPinyinWithToneNumbers
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone marks") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "$it -> ${it.second}" },
|
|
||||||
Syllable("sheng", Tone.FIRST) to "shēng",
|
|
||||||
Syllable("zhi", Tone.SECOND) to "zhí",
|
|
||||||
Syllable("ka", Tone.THIRD) to "kǎ",
|
|
||||||
Syllable("yao", Tone.FOURTH) to "yào",
|
|
||||||
Syllable("me", Tone.FIFTH) to "me",
|
|
||||||
Syllable("zhui", Tone.FIRST) to "zhuī",
|
|
||||||
Syllable("liu", Tone.FIRST) to "liū",
|
|
||||||
Syllable("nü", Tone.THIRD) to "nǚ",
|
|
||||||
Syllable("r", Tone.FIFTH) to "r",
|
|
||||||
Syllable("er", Tone.SECOND) to "ér",
|
|
||||||
) { (syllable, expectedPinyinWithToneMarks) ->
|
|
||||||
PinyinMarkSyllableFormatter.format(syllable) shouldBe expectedPinyinWithToneMarks
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to zhuyin with invalid pinyin syllable") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
|
|
||||||
}
|
|
||||||
|
|
||||||
context("format to pinyin with tone numbers with invalid pinyin syllable") {
|
|
||||||
}
|
|
||||||
})
|
|
@ -0,0 +1,119 @@
|
|||||||
|
package com.marvinelsen.chinese.transliteration.api
|
||||||
|
|
||||||
|
import io.kotest.core.spec.style.ShouldSpec
|
||||||
|
import io.kotest.datatest.withData
|
||||||
|
import io.kotest.matchers.shouldBe
|
||||||
|
|
||||||
|
class PinyinSyllableTest : ShouldSpec({
|
||||||
|
context("from pinyin with tone numbers") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||||
|
"sheng1" to PinyinSyllable("sheng", Tone.FIRST),
|
||||||
|
"zhi2" to PinyinSyllable("zhi", Tone.SECOND),
|
||||||
|
"ka3" to PinyinSyllable("ka", Tone.THIRD),
|
||||||
|
"yao4" to PinyinSyllable("yao", Tone.FORTH),
|
||||||
|
"me5" to PinyinSyllable("me", Tone.FIFTH),
|
||||||
|
"Me5" to PinyinSyllable("Me", Tone.FIFTH),
|
||||||
|
"nv3" to PinyinSyllable("nv", Tone.THIRD),
|
||||||
|
"nü3" to PinyinSyllable("nü", Tone.THIRD),
|
||||||
|
"nu:3" to PinyinSyllable("nu:", Tone.THIRD),
|
||||||
|
"r5" to PinyinSyllable("r", Tone.FIFTH),
|
||||||
|
"R5" to PinyinSyllable("R", Tone.FIFTH),
|
||||||
|
"er2" to PinyinSyllable("er", Tone.SECOND),
|
||||||
|
"Er2" to PinyinSyllable("Er", Tone.SECOND),
|
||||||
|
) { (pinyinWithNumber, expectedSyllable) ->
|
||||||
|
PinyinSyllable.fromPinyinWithToneNumber(pinyinWithNumber) shouldBe expectedSyllable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("from zhuyin") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||||
|
"ㄕㄥ" to PinyinSyllable("sheng", Tone.FIRST),
|
||||||
|
"ㄓˊ" to PinyinSyllable("zhi", Tone.SECOND),
|
||||||
|
"ㄎㄚˇ" to PinyinSyllable("ka", Tone.THIRD),
|
||||||
|
"ㄧㄠˋ" to PinyinSyllable("yao", Tone.FORTH),
|
||||||
|
"ㄇㄜ˙" to PinyinSyllable("me", Tone.FIFTH),
|
||||||
|
"˙ㄇㄜ" to PinyinSyllable("me", Tone.FIFTH),
|
||||||
|
"ㄋㄩˇ" to PinyinSyllable("nü", Tone.THIRD),
|
||||||
|
) { (zhuyin, expectedSyllable) ->
|
||||||
|
PinyinSyllable.fromZhuyin(zhuyin) shouldBe expectedSyllable
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("from invalid pinyin with tone numbers") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("from invalid zhuyin") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to zhuyin") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||||
|
PinyinSyllable("sheng", Tone.FIRST) to "ㄕㄥ",
|
||||||
|
PinyinSyllable("zhi", Tone.SECOND) to "ㄓˊ",
|
||||||
|
PinyinSyllable("ka", Tone.THIRD) to "ㄎㄚˇ",
|
||||||
|
PinyinSyllable("yao", Tone.FORTH) to "ㄧㄠˋ",
|
||||||
|
PinyinSyllable("me", Tone.FIFTH) to "˙ㄇㄜ",
|
||||||
|
PinyinSyllable("nü", Tone.THIRD) to "ㄋㄩˇ",
|
||||||
|
PinyinSyllable("nu:", Tone.THIRD) to "ㄋㄩˇ",
|
||||||
|
PinyinSyllable("nv", Tone.THIRD) to "ㄋㄩˇ",
|
||||||
|
PinyinSyllable("r", Tone.FIFTH) to "˙ㄦ",
|
||||||
|
PinyinSyllable("R", Tone.FIFTH) to "˙ㄦ",
|
||||||
|
PinyinSyllable("er", Tone.SECOND) to "ㄦˊ",
|
||||||
|
PinyinSyllable("Er", Tone.SECOND) to "ㄦˊ",
|
||||||
|
) { (syllable, expectedZhuyin) ->
|
||||||
|
syllable.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyin
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone numbers") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||||
|
PinyinSyllable("sheng", Tone.FIRST) to "sheng1",
|
||||||
|
PinyinSyllable("zhi", Tone.SECOND) to "zhi2",
|
||||||
|
PinyinSyllable("ka", Tone.THIRD) to "ka3",
|
||||||
|
PinyinSyllable("yao", Tone.FORTH) to "yao4",
|
||||||
|
PinyinSyllable("me", Tone.FIFTH) to "me5",
|
||||||
|
PinyinSyllable("nü", Tone.THIRD) to "nü3",
|
||||||
|
PinyinSyllable("nu:", Tone.THIRD) to "nu:3",
|
||||||
|
PinyinSyllable("nv", Tone.THIRD) to "nv3",
|
||||||
|
PinyinSyllable("r", Tone.FIFTH) to "r5",
|
||||||
|
PinyinSyllable("R", Tone.FIFTH) to "R5",
|
||||||
|
PinyinSyllable("er", Tone.SECOND) to "er2",
|
||||||
|
PinyinSyllable("Er", Tone.SECOND) to "Er2",
|
||||||
|
) { (syllable, expectedPinyinWithToneNumbers) ->
|
||||||
|
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedPinyinWithToneNumbers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone marks") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||||
|
PinyinSyllable("sheng", Tone.FIRST) to "shēng",
|
||||||
|
PinyinSyllable("zhi", Tone.SECOND) to "zhí",
|
||||||
|
PinyinSyllable("ka", Tone.THIRD) to "kǎ",
|
||||||
|
PinyinSyllable("yao", Tone.FORTH) to "yào",
|
||||||
|
PinyinSyllable("me", Tone.FIFTH) to "me",
|
||||||
|
PinyinSyllable("zhui", Tone.FIRST) to "zhuī",
|
||||||
|
PinyinSyllable("liu", Tone.FIRST) to "liū",
|
||||||
|
PinyinSyllable("nü", Tone.THIRD) to "nǚ",
|
||||||
|
PinyinSyllable("nu:", Tone.THIRD) to "nǚ",
|
||||||
|
PinyinSyllable("nv", Tone.THIRD) to "nǚ",
|
||||||
|
PinyinSyllable("r", Tone.FIFTH) to "r",
|
||||||
|
PinyinSyllable("er", Tone.SECOND) to "ér",
|
||||||
|
PinyinSyllable("Er", Tone.SECOND) to "Ér",
|
||||||
|
) { (syllable, expectedPinyinWithToneMarks) ->
|
||||||
|
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedPinyinWithToneMarks
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to zhuyin with invalid pinyin syllable") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to pinyin with tone numbers with invalid pinyin syllable") {
|
||||||
|
}
|
||||||
|
})
|
@ -1,6 +1,5 @@
|
|||||||
package com.marvinelsen.chinese.phonetics
|
package com.marvinelsen.chinese.transliteration.api
|
||||||
|
|
||||||
import com.marvinelsen.chinese.phonetics.exceptions.InvalidToneInputException
|
|
||||||
import io.kotest.assertions.throwables.shouldThrow
|
import io.kotest.assertions.throwables.shouldThrow
|
||||||
import io.kotest.core.spec.style.ShouldSpec
|
import io.kotest.core.spec.style.ShouldSpec
|
||||||
import io.kotest.datatest.withData
|
import io.kotest.datatest.withData
|
||||||
@ -14,7 +13,7 @@ class ToneTest : ShouldSpec({
|
|||||||
'1' to Tone.FIRST,
|
'1' to Tone.FIRST,
|
||||||
'2' to Tone.SECOND,
|
'2' to Tone.SECOND,
|
||||||
'3' to Tone.THIRD,
|
'3' to Tone.THIRD,
|
||||||
'4' to Tone.FOURTH,
|
'4' to Tone.FORTH,
|
||||||
'5' to Tone.FIFTH,
|
'5' to Tone.FIFTH,
|
||||||
) { (digit, expectedTone) ->
|
) { (digit, expectedTone) ->
|
||||||
Tone.fromDigit(digit) shouldBe expectedTone
|
Tone.fromDigit(digit) shouldBe expectedTone
|
||||||
@ -27,7 +26,7 @@ class ToneTest : ShouldSpec({
|
|||||||
1 to Tone.FIRST,
|
1 to Tone.FIRST,
|
||||||
2 to Tone.SECOND,
|
2 to Tone.SECOND,
|
||||||
3 to Tone.THIRD,
|
3 to Tone.THIRD,
|
||||||
4 to Tone.FOURTH,
|
4 to Tone.FORTH,
|
||||||
5 to Tone.FIFTH,
|
5 to Tone.FIFTH,
|
||||||
) { (number, expectedTone) ->
|
) { (number, expectedTone) ->
|
||||||
Tone.fromInt(number) shouldBe expectedTone
|
Tone.fromInt(number) shouldBe expectedTone
|
||||||
@ -37,25 +36,12 @@ class ToneTest : ShouldSpec({
|
|||||||
context("convert correctly from Zhuyin tone mark") {
|
context("convert correctly from Zhuyin tone mark") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||||
'¯' to Tone.FIRST,
|
|
||||||
'ˊ' to Tone.SECOND,
|
'ˊ' to Tone.SECOND,
|
||||||
'ˇ' to Tone.THIRD,
|
'ˇ' to Tone.THIRD,
|
||||||
'ˋ' to Tone.FOURTH,
|
'ˋ' to Tone.FORTH,
|
||||||
'˙' to Tone.FIFTH,
|
'˙' to Tone.FIFTH,
|
||||||
) { (zhuyinToneMark, expectedTone) ->
|
) { (zhuyinToneMark, expectedTone) ->
|
||||||
Tone.fromZhuyinTone(zhuyinToneMark) shouldBe expectedTone
|
Tone.fromZhuyinToneMark(zhuyinToneMark) shouldBe expectedTone
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("convert correctly from Pinyin tone mark") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
|
||||||
'\u0304' to Tone.FIRST,
|
|
||||||
'\u0301' to Tone.SECOND,
|
|
||||||
'\u030C' to Tone.THIRD,
|
|
||||||
'\u0300' to Tone.FOURTH,
|
|
||||||
) { (pinyinToneMark, expectedTone) ->
|
|
||||||
Tone.fromPinyinTone(pinyinToneMark) shouldBe expectedTone
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -65,52 +51,13 @@ class ToneTest : ShouldSpec({
|
|||||||
Tone.FIRST to 1,
|
Tone.FIRST to 1,
|
||||||
Tone.SECOND to 2,
|
Tone.SECOND to 2,
|
||||||
Tone.THIRD to 3,
|
Tone.THIRD to 3,
|
||||||
Tone.FOURTH to 4,
|
Tone.FORTH to 4,
|
||||||
Tone.FIFTH to 5,
|
Tone.FIFTH to 5,
|
||||||
) { (tone, expectedInteger) ->
|
) { (tone, expectedInteger) ->
|
||||||
tone.toInt() shouldBe expectedInteger
|
tone.toInt() shouldBe expectedInteger
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
context("convert correctly to Zhuyin tone mark") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first} -> '${it.second}'" },
|
|
||||||
Tone.FIRST to "",
|
|
||||||
Tone.SECOND to "ˊ",
|
|
||||||
Tone.THIRD to "ˇ",
|
|
||||||
Tone.FOURTH to "ˋ",
|
|
||||||
Tone.FIFTH to "˙",
|
|
||||||
) { (tone, zhuyinTone) ->
|
|
||||||
tone.toZhuyinTone() shouldBe zhuyinTone
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("convert correctly to Pinyin tone mark") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first} -> '${it.second}'" },
|
|
||||||
Tone.FIRST to "\u0304",
|
|
||||||
Tone.SECOND to "\u0301",
|
|
||||||
Tone.THIRD to "\u030C",
|
|
||||||
Tone.FOURTH to "\u0300",
|
|
||||||
Tone.FIFTH to "",
|
|
||||||
) { (tone, pinyinTone) ->
|
|
||||||
tone.toPinyinTone() shouldBe pinyinTone
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("convert correctly to Pinyin number") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "${it.first} -> '${it.second}'" },
|
|
||||||
Tone.FIRST to "1",
|
|
||||||
Tone.SECOND to "2",
|
|
||||||
Tone.THIRD to "3",
|
|
||||||
Tone.FOURTH to "4",
|
|
||||||
Tone.FIFTH to "5",
|
|
||||||
) { (tone, pinyinNumber) ->
|
|
||||||
tone.toPinyinNumber() shouldBe pinyinNumber
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("return null when converting from invalid digit ") {
|
context("return null when converting from invalid digit ") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "'$it' -> null" },
|
nameFn = { "'$it' -> null" },
|
||||||
@ -127,7 +74,7 @@ class ToneTest : ShouldSpec({
|
|||||||
|
|
||||||
context("return null when converting from invalid int ") {
|
context("return null when converting from invalid int ") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "'$it' -> null" },
|
nameFn = { "'$it' -> throws exception" },
|
||||||
0,
|
0,
|
||||||
6,
|
6,
|
||||||
-1,
|
-1,
|
||||||
@ -140,7 +87,7 @@ class ToneTest : ShouldSpec({
|
|||||||
|
|
||||||
context("return null when converting from invalid Zhuyin tone mark ") {
|
context("return null when converting from invalid Zhuyin tone mark ") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "'$it' -> null" },
|
nameFn = { "'$it' -> throws exception" },
|
||||||
'0',
|
'0',
|
||||||
'6',
|
'6',
|
||||||
'a',
|
'a',
|
||||||
@ -148,21 +95,7 @@ class ToneTest : ShouldSpec({
|
|||||||
'$',
|
'$',
|
||||||
'*',
|
'*',
|
||||||
) { invalidZhuyinToneMark ->
|
) { invalidZhuyinToneMark ->
|
||||||
Tone.fromZhuyinToneOrNull(invalidZhuyinToneMark).shouldBeNull()
|
Tone.fromZhuyinToneMarkOrNull(invalidZhuyinToneMark).shouldBeNull()
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
context("return null when converting from invalid Pinyin tone mark ") {
|
|
||||||
withData(
|
|
||||||
nameFn = { "'$it' -> null" },
|
|
||||||
'0',
|
|
||||||
'6',
|
|
||||||
'a',
|
|
||||||
'z',
|
|
||||||
'$',
|
|
||||||
'*',
|
|
||||||
) { invalidPinyinToneMark ->
|
|
||||||
Tone.fromPinyinToneOrNull(invalidPinyinToneMark).shouldBeNull()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,7 +109,7 @@ class ToneTest : ShouldSpec({
|
|||||||
'$',
|
'$',
|
||||||
'*',
|
'*',
|
||||||
) { invalidDigit ->
|
) { invalidDigit ->
|
||||||
shouldThrow<InvalidToneInputException> {
|
shouldThrow<IllegalArgumentException> {
|
||||||
Tone.fromDigit(invalidDigit)
|
Tone.fromDigit(invalidDigit)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -191,7 +124,7 @@ class ToneTest : ShouldSpec({
|
|||||||
Int.MAX_VALUE,
|
Int.MAX_VALUE,
|
||||||
Int.MIN_VALUE,
|
Int.MIN_VALUE,
|
||||||
) { invalidNumber ->
|
) { invalidNumber ->
|
||||||
shouldThrow<InvalidToneInputException> {
|
shouldThrow<IllegalArgumentException> {
|
||||||
Tone.fromInt(invalidNumber)
|
Tone.fromInt(invalidNumber)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -207,25 +140,48 @@ class ToneTest : ShouldSpec({
|
|||||||
'$',
|
'$',
|
||||||
'*',
|
'*',
|
||||||
) { invalidZhuyinToneMark ->
|
) { invalidZhuyinToneMark ->
|
||||||
shouldThrow<InvalidToneInputException> {
|
shouldThrow<IllegalArgumentException> {
|
||||||
Tone.fromZhuyinTone(invalidZhuyinToneMark)
|
Tone.fromZhuyinToneMark(invalidZhuyinToneMark)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
context("throw exception when converting from invalid Pinyin tone mark ") {
|
context("format to Zhuyin correctly") {
|
||||||
withData(
|
withData(
|
||||||
nameFn = { "'$it' -> throws exception" },
|
nameFn = { "${it.first} -> '${it.second}'" },
|
||||||
'0',
|
Tone.FIRST to "",
|
||||||
'6',
|
Tone.SECOND to "ˊ",
|
||||||
'a',
|
Tone.THIRD to "ˇ",
|
||||||
'z',
|
Tone.FORTH to "ˋ",
|
||||||
'$',
|
Tone.FIFTH to "˙",
|
||||||
'*',
|
) { (tone, expectedZhuyinToneMark) ->
|
||||||
) { invalidPinyinToneMark ->
|
tone.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyinToneMark
|
||||||
shouldThrow<InvalidToneInputException> {
|
}
|
||||||
Tone.fromPinyinTone(invalidPinyinToneMark)
|
}
|
||||||
}
|
|
||||||
|
context("format to Pinyin with tone numbers correctly") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first} -> '${it.second}'" },
|
||||||
|
Tone.FIRST to "1",
|
||||||
|
Tone.SECOND to "2",
|
||||||
|
Tone.THIRD to "3",
|
||||||
|
Tone.FORTH to "4",
|
||||||
|
Tone.FIFTH to "5",
|
||||||
|
) { (tone, expectedNumber) ->
|
||||||
|
tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedNumber
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context("format to Pinyin with tone marks correctly") {
|
||||||
|
withData(
|
||||||
|
nameFn = { "${it.first} -> '${it.second}'" },
|
||||||
|
Tone.FIRST to "\u0304",
|
||||||
|
Tone.SECOND to "\u0301",
|
||||||
|
Tone.THIRD to "\u030C",
|
||||||
|
Tone.FORTH to "\u0300",
|
||||||
|
Tone.FIFTH to "",
|
||||||
|
) { (tone, expectedAccent) ->
|
||||||
|
tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedAccent
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
Loading…
x
Reference in New Issue
Block a user