This commit is contained in:
commit
2b116b92e0
24
.gitea/workflows/publish.yaml
Normal file
24
.gitea/workflows/publish.yaml
Normal file
@ -0,0 +1,24 @@
|
||||
name: Publish package
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v4
|
||||
- name: Setup Java
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: 21
|
||||
- name: Setup Gradle
|
||||
uses: gradle/actions/setup-gradle@v4
|
||||
- name: Publish
|
||||
run: ./gradlew publish
|
||||
env:
|
||||
GITEA_TOKEN: ${{ secrets.PACKAGE_TOKEN }}
|
26
.gitea/workflows/pull-request.yaml
Normal file
26
.gitea/workflows/pull-request.yaml
Normal file
@ -0,0 +1,26 @@
|
||||
name: Pull Request
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v4
|
||||
- name: Setup Java
|
||||
uses: actions/setup-java@v4
|
||||
with:
|
||||
distribution: 'temurin'
|
||||
java-version: 21
|
||||
- name: Setup Gradle
|
||||
uses: gradle/actions/setup-gradle@v4
|
||||
- name: Lint
|
||||
run: ./gradlew detekt
|
||||
- name: Build
|
||||
run: ./gradlew build testClasses -x check
|
||||
- name: Test
|
||||
run: ./gradlew test
|
42
.gitignore
vendored
Normal file
42
.gitignore
vendored
Normal file
@ -0,0 +1,42 @@
|
||||
.gradle
|
||||
build/
|
||||
!gradle/wrapper/gradle-wrapper.jar
|
||||
!**/src/main/**/build/
|
||||
!**/src/test/**/build/
|
||||
|
||||
### IntelliJ IDEA ###
|
||||
.idea
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
out/
|
||||
!**/src/main/**/out/
|
||||
!**/src/test/**/out/
|
||||
|
||||
### Kotlin ###
|
||||
.kotlin
|
||||
|
||||
### Eclipse ###
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
bin/
|
||||
!**/src/main/**/bin/
|
||||
!**/src/test/**/bin/
|
||||
|
||||
### NetBeans ###
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
|
||||
### VS Code ###
|
||||
.vscode/
|
||||
|
||||
### Mac OS ###
|
||||
.DS_Store
|
19
LICENSE
Normal file
19
LICENSE
Normal file
@ -0,0 +1,19 @@
|
||||
Copyright (c) 2024 Marvin Elsen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
OR OTHER DEALINGS IN THE SOFTWARE.
|
45
README.md
Normal file
45
README.md
Normal file
@ -0,0 +1,45 @@
|
||||
# Pinyin Mapper for Kotlin
|
||||
|
||||
A Pinyin Mapper written in [Kotlin](https://kotlinlang.org).
|
||||
|
||||
## Installation
|
||||
|
||||
_Pinyin Mapper for Kotlin_ is available
|
||||
from [my self-hosted Gitea instance](https://gitea.marvinelsen.com/marvinelsen/cedict-parser).
|
||||
|
||||
First, add the repository to your `build.gradle.kts` file:
|
||||
|
||||
```kotlin
|
||||
repositories {
|
||||
maven {
|
||||
url = uri("https://gitea.marvinelsen.com/api/packages/marvinelsen/maven")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Afterwards, add the package dependency to your `build.gradle.kts` file:
|
||||
|
||||
```kotlin
|
||||
dependencies {
|
||||
implementation("com.marvinelsen:pinyin-mapper:1.0-SNAPSHOT")
|
||||
}
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```kotlin
|
||||
fun main() {
|
||||
TODO()
|
||||
}
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
All source code in this repository is licensed under a [MIT license](LICENSE), unless otherwise noted.
|
||||
|
||||
To the following third-party code, data, and files in the repository different licenses apply:
|
||||
|
||||
### CC-CEDICT
|
||||
|
||||
[CC-CEDICT](https://cc-cedict.org/wiki) is licensed under
|
||||
a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/).
|
60
build.gradle.kts
Normal file
60
build.gradle.kts
Normal file
@ -0,0 +1,60 @@
|
||||
plugins {
|
||||
alias(libs.plugins.kotlin.jvm)
|
||||
alias(libs.plugins.detekt)
|
||||
`maven-publish`
|
||||
}
|
||||
|
||||
group = "com.marvinelsen"
|
||||
version = "1.0-SNAPSHOT"
|
||||
|
||||
repositories {
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
dependencies {
|
||||
detektPlugins(libs.detekt.formatting)
|
||||
|
||||
testImplementation(libs.kotest.core)
|
||||
testImplementation(libs.kotest.assertions)
|
||||
testImplementation(libs.kotest.data)
|
||||
}
|
||||
|
||||
tasks.test {
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
kotlin {
|
||||
jvmToolchain(21)
|
||||
}
|
||||
|
||||
publishing {
|
||||
publications {
|
||||
create<MavenPublication>("maven") {
|
||||
groupId = project.group as String
|
||||
artifactId = "pinyin"
|
||||
version = project.version as String
|
||||
|
||||
from(components["java"])
|
||||
}
|
||||
}
|
||||
repositories {
|
||||
maven {
|
||||
name = "Gitea"
|
||||
url = uri("https://gitea.marvinelsen.com/api/packages/marvinelsen/maven")
|
||||
credentials(HttpHeaderCredentials::class) {
|
||||
name = "Authorization"
|
||||
value = "token ${System.getenv("GITEA_TOKEN")}"
|
||||
}
|
||||
isAllowInsecureProtocol = true
|
||||
authentication {
|
||||
val header by registering(HttpHeaderAuthentication::class)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
detekt {
|
||||
buildUponDefaultConfig = true
|
||||
allRules = false
|
||||
autoCorrect = true
|
||||
}
|
1
gradle.properties
Normal file
1
gradle.properties
Normal file
@ -0,0 +1 @@
|
||||
kotlin.code.style=official
|
23
gradle/libs.versions.toml
Normal file
23
gradle/libs.versions.toml
Normal file
@ -0,0 +1,23 @@
|
||||
[versions]
|
||||
kotlin = "2.0.20"
|
||||
kotest = "5.9.1"
|
||||
detekt = "1.23.7"
|
||||
|
||||
[libraries]
|
||||
# Kotest
|
||||
# See: https://kotest.io
|
||||
kotest-core = { module = "io.kotest:kotest-runner-junit5", version.ref = "kotest" }
|
||||
kotest-assertions = { module = "io.kotest:kotest-assertions-core", version.ref = "kotest" }
|
||||
kotest-data = { module = "io.kotest:kotest-framework-datatest", version.ref = "kotest" }
|
||||
|
||||
# Detekt
|
||||
# See: https://detekt.dev
|
||||
detekt-formatting = { module = "io.gitlab.arturbosch.detekt:detekt-formatting", version.ref = "detekt" }
|
||||
|
||||
[plugins]
|
||||
# Kotlin
|
||||
# See: https://plugins.gradle.org/plugin/org.jetbrains.kotlin.jvm
|
||||
kotlin-jvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" }
|
||||
# Detekt
|
||||
# See: https://detekt.dev
|
||||
detekt = { id = "io.gitlab.arturbosch.detekt", version.ref = "detekt" }
|
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
BIN
gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
Binary file not shown.
7
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
7
gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip
|
||||
networkTimeout=10000
|
||||
validateDistributionUrl=true
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
252
gradlew
vendored
Executable file
252
gradlew
vendored
Executable file
@ -0,0 +1,252 @@
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Copyright © 2015-2021 the original authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
##############################################################################
|
||||
#
|
||||
# Gradle start up script for POSIX generated by Gradle.
|
||||
#
|
||||
# Important for running:
|
||||
#
|
||||
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||
# noncompliant, but you have some other compliant shell such as ksh or
|
||||
# bash, then to run this script, type that shell name before the whole
|
||||
# command line, like:
|
||||
#
|
||||
# ksh Gradle
|
||||
#
|
||||
# Busybox and similar reduced shells will NOT work, because this script
|
||||
# requires all of these POSIX shell features:
|
||||
# * functions;
|
||||
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||
# * compound commands having a testable exit status, especially «case»;
|
||||
# * various built-in commands including «command», «set», and «ulimit».
|
||||
#
|
||||
# Important for patching:
|
||||
#
|
||||
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||
#
|
||||
# The "traditional" practice of packing multiple parameters into a
|
||||
# space-separated string is a well documented source of bugs and security
|
||||
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||
# options in "$@", and eventually passing that to Java.
|
||||
#
|
||||
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||
# see the in-line comments for details.
|
||||
#
|
||||
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||
# Darwin, MinGW, and NonStop.
|
||||
#
|
||||
# (3) This script is generated from the Groovy template
|
||||
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||
# within the Gradle project.
|
||||
#
|
||||
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||
#
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
|
||||
# Resolve links: $0 may be a link
|
||||
app_path=$0
|
||||
|
||||
# Need this for daisy-chained symlinks.
|
||||
while
|
||||
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||
[ -h "$app_path" ]
|
||||
do
|
||||
ls=$( ls -ld "$app_path" )
|
||||
link=${ls#*' -> '}
|
||||
case $link in #(
|
||||
/*) app_path=$link ;; #(
|
||||
*) app_path=$APP_HOME$link ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# This is normally unused
|
||||
# shellcheck disable=SC2034
|
||||
APP_BASE_NAME=${0##*/}
|
||||
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
||||
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
|
||||
' "$PWD" ) || exit
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD=maximum
|
||||
|
||||
warn () {
|
||||
echo "$*"
|
||||
} >&2
|
||||
|
||||
die () {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
} >&2
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "$( uname )" in #(
|
||||
CYGWIN* ) cygwin=true ;; #(
|
||||
Darwin* ) darwin=true ;; #(
|
||||
MSYS* | MINGW* ) msys=true ;; #(
|
||||
NONSTOP* ) nonstop=true ;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||
else
|
||||
JAVACMD=$JAVA_HOME/bin/java
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD=java
|
||||
if ! command -v java >/dev/null 2>&1
|
||||
then
|
||||
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||
case $MAX_FD in #(
|
||||
max*)
|
||||
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
MAX_FD=$( ulimit -H -n ) ||
|
||||
warn "Could not query maximum file descriptor limit"
|
||||
esac
|
||||
case $MAX_FD in #(
|
||||
'' | soft) :;; #(
|
||||
*)
|
||||
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
ulimit -n "$MAX_FD" ||
|
||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||
esac
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command, stacking in reverse order:
|
||||
# * args from the command line
|
||||
# * the main class name
|
||||
# * -classpath
|
||||
# * -D...appname settings
|
||||
# * --module-path (only if needed)
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||
|
||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||
if "$cygwin" || "$msys" ; then
|
||||
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||
|
||||
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
for arg do
|
||||
if
|
||||
case $arg in #(
|
||||
-*) false ;; # don't mess with options #(
|
||||
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||
[ -e "$t" ] ;; #(
|
||||
*) false ;;
|
||||
esac
|
||||
then
|
||||
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||
fi
|
||||
# Roll the args list around exactly as many times as the number of
|
||||
# args, so each arg winds up back in the position where it started, but
|
||||
# possibly modified.
|
||||
#
|
||||
# NB: a `for` loop captures its iteration list before it begins, so
|
||||
# changing the positional parameters here affects neither the number of
|
||||
# iterations, nor the values presented in `arg`.
|
||||
shift # remove old arg
|
||||
set -- "$@" "$arg" # push replacement arg
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
|
||||
# Collect all arguments for the java command:
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
|
||||
# and any embedded shellness will be escaped.
|
||||
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
|
||||
# treated as '${Hostname}' itself on the command line.
|
||||
|
||||
set -- \
|
||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||
-classpath "$CLASSPATH" \
|
||||
org.gradle.wrapper.GradleWrapperMain \
|
||||
"$@"
|
||||
|
||||
# Stop when "xargs" is not available.
|
||||
if ! command -v xargs >/dev/null 2>&1
|
||||
then
|
||||
die "xargs is not available"
|
||||
fi
|
||||
|
||||
# Use "xargs" to parse quoted args.
|
||||
#
|
||||
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||
#
|
||||
# In Bash we could simply go:
|
||||
#
|
||||
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||
# set -- "${ARGS[@]}" "$@"
|
||||
#
|
||||
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||
# character that might be a shell metacharacter, then use eval to reverse
|
||||
# that process (while maintaining the separation between arguments), and wrap
|
||||
# the whole thing up as a single "set" statement.
|
||||
#
|
||||
# This will of course break if any of these variables contains a newline or
|
||||
# an unmatched quote.
|
||||
#
|
||||
|
||||
eval "set -- $(
|
||||
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||
xargs -n1 |
|
||||
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||
tr '\n' ' '
|
||||
)" '"$@"'
|
||||
|
||||
exec "$JAVACMD" "$@"
|
94
gradlew.bat
vendored
Normal file
94
gradlew.bat
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
@rem
|
||||
@rem Copyright 2015 the original author or authors.
|
||||
@rem
|
||||
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@rem you may not use this file except in compliance with the License.
|
||||
@rem You may obtain a copy of the License at
|
||||
@rem
|
||||
@rem https://www.apache.org/licenses/LICENSE-2.0
|
||||
@rem
|
||||
@rem Unless required by applicable law or agreed to in writing, software
|
||||
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@rem See the License for the specific language governing permissions and
|
||||
@rem limitations under the License.
|
||||
@rem
|
||||
@rem SPDX-License-Identifier: Apache-2.0
|
||||
@rem
|
||||
|
||||
@if "%DEBUG%"=="" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@rem
|
||||
@rem ##########################################################################
|
||||
|
||||
@rem Set local scope for the variables with windows NT shell
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%"=="" set DIRNAME=.
|
||||
@rem This is normally unused
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
||||
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
||||
|
||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
||||
|
||||
@rem Find java.exe
|
||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if %ERRORLEVEL% equ 0 goto execute
|
||||
|
||||
echo. 1>&2
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
|
||||
echo. 1>&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||
echo location of your Java installation. 1>&2
|
||||
|
||||
goto fail
|
||||
|
||||
:findJavaFromJavaHome
|
||||
set JAVA_HOME=%JAVA_HOME:"=%
|
||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto execute
|
||||
|
||||
echo. 1>&2
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
|
||||
echo. 1>&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||
echo location of your Java installation. 1>&2
|
||||
|
||||
goto fail
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if %ERRORLEVEL% equ 0 goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
set EXIT_CODE=%ERRORLEVEL%
|
||||
if %EXIT_CODE% equ 0 set EXIT_CODE=1
|
||||
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
|
||||
exit /b %EXIT_CODE%
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
||||
:omega
|
1
settings.gradle.kts
Normal file
1
settings.gradle.kts
Normal file
@ -0,0 +1 @@
|
||||
rootProject.name = "pinyin"
|
@ -0,0 +1,110 @@
|
||||
package com.marvinelsen.chinese.transliteration
|
||||
|
||||
import java.io.InputStream
|
||||
|
||||
data class Syllable(
|
||||
val pinyinSyllableWithoutTone: String,
|
||||
val tone: Tone,
|
||||
) {
|
||||
companion object {
|
||||
private val pinyinToZhuyin = parseTranscriptions(
|
||||
this::class.java.getResourceAsStream("/pinyin_zhuyin_transcriptions.tsv")!!
|
||||
)
|
||||
private val zhuyinToPinyin = pinyinToZhuyin.entries.associate { it.value to it.key }
|
||||
private val zhuyinToneMarkRegex = """[ˊˇˋ˙]""".toRegex()
|
||||
|
||||
fun fromPinyinWithToneNumber(pinyinWithToneNumber: String): Syllable {
|
||||
val pinyinWithoutNumber = pinyinWithToneNumber.substring(0, pinyinWithToneNumber.lastIndex)
|
||||
val lastCharacter = pinyinWithToneNumber.last()
|
||||
|
||||
require(lastCharacter.isDigit()) {
|
||||
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the last character to be a digit, but was '${pinyinWithToneNumber.last()}'"
|
||||
}
|
||||
@Suppress("MagicNumber")
|
||||
require(lastCharacter.digitToInt() in 1..5) {
|
||||
"'$pinyinWithToneNumber' is not a valid Pinyin with tone number syllable. Expected the tone number 'n' to be in range 1 <= n <= 5, but was '${pinyinWithToneNumber.last()}'"
|
||||
}
|
||||
require(
|
||||
pinyinWithoutNumber.lowercase() in pinyinToZhuyin
|
||||
) { "'$pinyinWithoutNumber' is not a valid Pinyin syllable." }
|
||||
|
||||
return Syllable(
|
||||
pinyinSyllableWithoutTone = pinyinWithoutNumber,
|
||||
tone = Tone.fromDigit(lastCharacter)
|
||||
)
|
||||
}
|
||||
|
||||
fun fromZhuyin(zhuyin: String): Syllable {
|
||||
val zhuyinWithoutToneMark = zhuyin.replace(zhuyinToneMarkRegex, "")
|
||||
|
||||
require(zhuyinWithoutToneMark in zhuyinToPinyin) { "'$zhuyin' is not a valid Zhuyin syllable." }
|
||||
|
||||
return Syllable(
|
||||
zhuyinToPinyin[zhuyinWithoutToneMark]!!,
|
||||
Tone.fromZhuyinToneMarkOrNull(zhuyin.last()) ?: Tone.fromZhuyinToneMarkOrNull(zhuyin.first())
|
||||
?: Tone.FIRST
|
||||
)
|
||||
}
|
||||
|
||||
private fun parseTranscriptions(inputStream: InputStream) =
|
||||
inputStream.bufferedReader().useLines { lines ->
|
||||
lines.map { it.split('\t') }
|
||||
.associate { it[0] to it[1] }
|
||||
}
|
||||
}
|
||||
|
||||
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
|
||||
TransliterationSystem.ZHUYIN -> formatToZhuyin()
|
||||
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
|
||||
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
|
||||
}
|
||||
|
||||
private fun formatToZhuyin(): String {
|
||||
val zhuyinSyllable = pinyinToZhuyin[pinyinSyllableWithoutTone.lowercase()]
|
||||
?: error("$pinyinSyllableWithoutTone is not a valid Pinyin syllable")
|
||||
val zhuyinToneMark = tone.format(TransliterationSystem.ZHUYIN)
|
||||
|
||||
return when (tone) {
|
||||
Tone.FIRST, Tone.SECOND, Tone.THIRD, Tone.FORTH -> zhuyinSyllable + zhuyinToneMark
|
||||
Tone.FIFTH -> zhuyinToneMark + zhuyinSyllable
|
||||
}
|
||||
}
|
||||
|
||||
private fun formatToPinyinWithToneNumbers(): String {
|
||||
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
|
||||
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
|
||||
}
|
||||
|
||||
return pinyinSyllableWithoutTone + tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)
|
||||
}
|
||||
|
||||
private fun formatToPinyinWithToneMarks(): String {
|
||||
check(pinyinSyllableWithoutTone.lowercase() in pinyinToZhuyin) {
|
||||
"'$pinyinSyllableWithoutTone is not a valid Pinyin syllable."
|
||||
}
|
||||
|
||||
val sanitizedPinyinSyllableWithoutTone = pinyinSyllableWithoutTone.replace("v", "ü").replace("u:", "ü")
|
||||
|
||||
val characterToIndex = sanitizedPinyinSyllableWithoutTone.withIndex().associate { it.value to it.index }
|
||||
val vowelIndex = when {
|
||||
'a' in characterToIndex -> characterToIndex['a']!!
|
||||
'o' in characterToIndex -> characterToIndex['o']!!
|
||||
'e' in characterToIndex -> characterToIndex['e']!!
|
||||
'i' in characterToIndex ->
|
||||
if (sanitizedPinyinSyllableWithoutTone.elementAtOrNull(characterToIndex['i']!! + 1) == 'u') {
|
||||
characterToIndex['u']!!
|
||||
} else {
|
||||
characterToIndex['i']!!
|
||||
}
|
||||
|
||||
'u' in characterToIndex -> characterToIndex['u']!!
|
||||
'ü' in characterToIndex -> characterToIndex['ü']!!
|
||||
else -> error("No vowel found in Pinyin syllable '$sanitizedPinyinSyllableWithoutTone'")
|
||||
}
|
||||
|
||||
return buildString {
|
||||
append(sanitizedPinyinSyllableWithoutTone)
|
||||
insert(vowelIndex + 1, tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS))
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
package com.marvinelsen.chinese.transliteration
|
||||
|
||||
@Suppress("MagicNumber")
|
||||
enum class Tone {
|
||||
FIRST, SECOND, THIRD, FORTH, FIFTH;
|
||||
|
||||
companion object {
|
||||
fun fromInt(number: Int) =
|
||||
fromIntOrNull(number) ?: throw IllegalArgumentException("Number $number is not a valid tone")
|
||||
|
||||
fun fromIntOrNull(number: Int) = when (number) {
|
||||
1 -> FIRST
|
||||
2 -> SECOND
|
||||
3 -> THIRD
|
||||
4 -> FORTH
|
||||
5 -> FIFTH
|
||||
else -> null
|
||||
}
|
||||
|
||||
fun fromDigit(digit: Char) =
|
||||
fromDigitOrNull(digit) ?: throw IllegalArgumentException("Digit $digit is not a valid tone")
|
||||
|
||||
fun fromDigitOrNull(digit: Char) = when (digit) {
|
||||
'1' -> FIRST
|
||||
'2' -> SECOND
|
||||
'3' -> THIRD
|
||||
'4' -> FORTH
|
||||
'5' -> FIFTH
|
||||
else -> null
|
||||
}
|
||||
|
||||
fun fromZhuyinToneMark(zhuyinToneMark: Char) = fromZhuyinToneMarkOrNull(zhuyinToneMark)
|
||||
?: throw IllegalArgumentException("Invalid zhuyin tone mark '$zhuyinToneMark'")
|
||||
|
||||
fun fromZhuyinToneMarkOrNull(zhuyinToneMark: Char) = when (zhuyinToneMark) {
|
||||
'ˊ' -> SECOND
|
||||
'ˇ' -> THIRD
|
||||
'ˋ' -> FORTH
|
||||
'˙' -> FIFTH
|
||||
else -> null
|
||||
}
|
||||
}
|
||||
|
||||
fun toInt() = when (this) {
|
||||
FIRST -> 1
|
||||
SECOND -> 2
|
||||
THIRD -> 3
|
||||
FORTH -> 4
|
||||
FIFTH -> 5
|
||||
}
|
||||
|
||||
fun format(transliterationSystem: TransliterationSystem) = when (transliterationSystem) {
|
||||
TransliterationSystem.ZHUYIN -> formatToZhuyin()
|
||||
TransliterationSystem.PINYIN_WITH_TONE_NUMBERS -> formatToPinyinWithToneNumbers()
|
||||
TransliterationSystem.PINYIN_WITH_TONE_MARKS -> formatToPinyinWithToneMarks()
|
||||
}
|
||||
|
||||
private fun formatToPinyinWithToneNumbers() = when (this) {
|
||||
FIRST -> "1"
|
||||
SECOND -> "2"
|
||||
THIRD -> "3"
|
||||
FORTH -> "4"
|
||||
FIFTH -> "5"
|
||||
}
|
||||
|
||||
private fun formatToPinyinWithToneMarks() = when (this) {
|
||||
FIRST -> "\u0304"
|
||||
SECOND -> "\u0301"
|
||||
THIRD -> "\u030C"
|
||||
FORTH -> "\u0300"
|
||||
FIFTH -> ""
|
||||
}
|
||||
|
||||
private fun formatToZhuyin() = when (this) {
|
||||
FIRST -> ""
|
||||
SECOND -> "ˊ"
|
||||
THIRD -> "ˇ"
|
||||
FORTH -> "ˋ"
|
||||
FIFTH -> "˙"
|
||||
}
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package com.marvinelsen.chinese.transliteration
|
||||
|
||||
enum class TransliterationSystem {
|
||||
ZHUYIN, PINYIN_WITH_TONE_NUMBERS, PINYIN_WITH_TONE_MARKS
|
||||
}
|
@ -0,0 +1,5 @@
|
||||
package com.marvinelsen.chinese.transliteration
|
||||
|
||||
object Zhuyin {
|
||||
const val SEPARATOR = " "
|
||||
}
|
434
src/main/resources/pinyin_zhuyin_transcriptions.tsv
Normal file
434
src/main/resources/pinyin_zhuyin_transcriptions.tsv
Normal file
@ -0,0 +1,434 @@
|
||||
a ㄚ
|
||||
ai ㄞ
|
||||
an ㄢ
|
||||
ang ㄤ
|
||||
ao ㄠ
|
||||
ba ㄅㄚ
|
||||
bai ㄅㄞ
|
||||
ban ㄅㄢ
|
||||
bang ㄅㄤ
|
||||
bao ㄅㄠ
|
||||
bei ㄅㄟ
|
||||
ben ㄅㄣ
|
||||
beng ㄅㄥ
|
||||
bi ㄅㄧ
|
||||
biu ㄅㄧㄡ
|
||||
bia ㄅㄧㄚ
|
||||
bian ㄅㄧㄢ
|
||||
biang ㄅㄧㄤ
|
||||
biao ㄅㄧㄠ
|
||||
bie ㄅㄧㄝ
|
||||
bin ㄅㄧㄣ
|
||||
bing ㄅㄧㄥ
|
||||
bo ㄅㄛ
|
||||
bu ㄅㄨ
|
||||
ca ㄘㄚ
|
||||
cai ㄘㄞ
|
||||
can ㄘㄢ
|
||||
cang ㄘㄤ
|
||||
cao ㄘㄠ
|
||||
ce ㄘㄜ
|
||||
cen ㄘㄣ
|
||||
ceng ㄘㄥ
|
||||
cha ㄔㄚ
|
||||
chai ㄔㄞ
|
||||
chan ㄔㄢ
|
||||
chang ㄔㄤ
|
||||
chao ㄔㄠ
|
||||
che ㄔㄜ
|
||||
chen ㄔㄣ
|
||||
cheng ㄔㄥ
|
||||
chi ㄔ
|
||||
chong ㄔㄨㄥ
|
||||
chou ㄔㄡ
|
||||
chu ㄔㄨ
|
||||
chua ㄔㄨㄚ
|
||||
chuai ㄔㄨㄞ
|
||||
chuan ㄔㄨㄢ
|
||||
chuang ㄔㄨㄤ
|
||||
chui ㄔㄨㄟ
|
||||
chun ㄔㄨㄣ
|
||||
chuo ㄔㄨㄛ
|
||||
ci ㄘ
|
||||
cong ㄘㄨㄥ
|
||||
cou ㄘㄡ
|
||||
cu ㄘㄨ
|
||||
cuan ㄘㄨㄢ
|
||||
cui ㄘㄨㄟ
|
||||
cun ㄘㄨㄣ
|
||||
cuo ㄘㄨㄛ
|
||||
da ㄉㄚ
|
||||
dai ㄉㄞ
|
||||
dan ㄉㄢ
|
||||
dang ㄉㄤ
|
||||
dao ㄉㄠ
|
||||
de ㄉㄜ
|
||||
dei ㄉㄟ
|
||||
den ㄉㄣ
|
||||
deng ㄉㄥ
|
||||
di ㄉㄧ
|
||||
dia ㄉㄧㄚ
|
||||
dian ㄉㄧㄢ
|
||||
diang ㄉㄧㄤ
|
||||
diao ㄉㄧㄠ
|
||||
die ㄉㄧㄝ
|
||||
ding ㄉㄧㄥ
|
||||
diu ㄉㄧㄡ
|
||||
dong ㄉㄨㄥ
|
||||
dou ㄉㄡ
|
||||
du ㄉㄨ
|
||||
duan ㄉㄨㄢ
|
||||
dui ㄉㄨㄟ
|
||||
dun ㄉㄨㄣ
|
||||
duo ㄉㄨㄛ
|
||||
e ㄜ
|
||||
ei ㄟ
|
||||
en ㄣ
|
||||
eng ㄥ
|
||||
r ㄦ
|
||||
er ㄦ
|
||||
fa ㄈㄚ
|
||||
fan ㄈㄢ
|
||||
fang ㄈㄤ
|
||||
fei ㄈㄟ
|
||||
fen ㄈㄣ
|
||||
feng ㄈㄥ
|
||||
fo ㄈㄛ
|
||||
fou ㄈㄡ
|
||||
fu ㄈㄨ
|
||||
fiao ㄈㄧㄠ
|
||||
ga ㄍㄚ
|
||||
gai ㄍㄞ
|
||||
gan ㄍㄢ
|
||||
gang ㄍㄤ
|
||||
gao ㄍㄠ
|
||||
ge ㄍㄜ
|
||||
gei ㄍㄟ
|
||||
gen ㄍㄣ
|
||||
geng ㄍㄥ
|
||||
gong ㄍㄨㄥ
|
||||
ging ㄍㄧㄥ
|
||||
gou ㄍㄡ
|
||||
gu ㄍㄨ
|
||||
gua ㄍㄨㄚ
|
||||
guai ㄍㄨㄞ
|
||||
guan ㄍㄨㄢ
|
||||
guang ㄍㄨㄤ
|
||||
gui ㄍㄨㄟ
|
||||
gun ㄍㄨㄣ
|
||||
guo ㄍㄨㄛ
|
||||
ha ㄏㄚ
|
||||
hai ㄏㄞ
|
||||
han ㄏㄢ
|
||||
hang ㄏㄤ
|
||||
hao ㄏㄠ
|
||||
he ㄏㄜ
|
||||
hei ㄏㄟ
|
||||
hen ㄏㄣ
|
||||
heng ㄏㄥ
|
||||
hong ㄏㄨㄥ
|
||||
hou ㄏㄡ
|
||||
hu ㄏㄨ
|
||||
hua ㄏㄨㄚ
|
||||
huai ㄏㄨㄞ
|
||||
huan ㄏㄨㄢ
|
||||
huang ㄏㄨㄤ
|
||||
hui ㄏㄨㄟ
|
||||
hun ㄏㄨㄣ
|
||||
huo ㄏㄨㄛ
|
||||
hue ㄏㄨㄜ
|
||||
ji ㄐㄧ
|
||||
jia ㄐㄧㄚ
|
||||
jian ㄐㄧㄢ
|
||||
jiang ㄐㄧㄤ
|
||||
jiao ㄐㄧㄠ
|
||||
jie ㄐㄧㄝ
|
||||
jin ㄐㄧㄣ
|
||||
jing ㄐㄧㄥ
|
||||
jiong ㄐㄩㄥ
|
||||
jiu ㄐㄧㄡ
|
||||
ju ㄐㄩ
|
||||
juan ㄐㄩㄢ
|
||||
jue ㄐㄩㄝ
|
||||
jun ㄐㄩㄣ
|
||||
ka ㄎㄚ
|
||||
kai ㄎㄞ
|
||||
kan ㄎㄢ
|
||||
kang ㄎㄤ
|
||||
kao ㄎㄠ
|
||||
ke ㄎㄜ
|
||||
kei ㄎㄟ
|
||||
ken ㄎㄣ
|
||||
keng ㄎㄥ
|
||||
kong ㄎㄨㄥ
|
||||
kou ㄎㄡ
|
||||
ku ㄎㄨ
|
||||
kua ㄎㄨㄚ
|
||||
kuai ㄎㄨㄞ
|
||||
kuan ㄎㄨㄢ
|
||||
kuang ㄎㄨㄤ
|
||||
kui ㄎㄨㄟ
|
||||
kun ㄎㄨㄣ
|
||||
kuo ㄎㄨㄛ
|
||||
la ㄌㄚ
|
||||
lai ㄌㄞ
|
||||
lan ㄌㄢ
|
||||
lang ㄌㄤ
|
||||
lao ㄌㄠ
|
||||
le ㄌㄜ
|
||||
lei ㄌㄟ
|
||||
leng ㄌㄥ
|
||||
li ㄌㄧ
|
||||
lia ㄌㄧㄚ
|
||||
lian ㄌㄧㄢ
|
||||
liang ㄌㄧㄤ
|
||||
liao ㄌㄧㄠ
|
||||
lie ㄌㄧㄝ
|
||||
lin ㄌㄧㄣ
|
||||
ling ㄌㄧㄥ
|
||||
liu ㄌㄧㄡ
|
||||
lo ㄌㄛ
|
||||
long ㄌㄨㄥ
|
||||
lou ㄌㄡ
|
||||
lu ㄌㄨ
|
||||
luan ㄌㄨㄢ
|
||||
lun ㄌㄨㄣ
|
||||
luo ㄌㄨㄛ
|
||||
lu: ㄌㄩ
|
||||
lv ㄌㄩ
|
||||
lü ㄌㄩ
|
||||
lu:e ㄌㄩㄝ
|
||||
lve ㄌㄩㄝ
|
||||
lüe ㄌㄩㄝ
|
||||
lu:n ㄌㄩㄣ
|
||||
lvn ㄌㄩㄣ
|
||||
lün ㄌㄩㄣ
|
||||
m ㄇ
|
||||
ma ㄇㄚ
|
||||
mai ㄇㄞ
|
||||
man ㄇㄢ
|
||||
mang ㄇㄤ
|
||||
mao ㄇㄠ
|
||||
me ㄇㄜ
|
||||
mei ㄇㄟ
|
||||
men ㄇㄣ
|
||||
meng ㄇㄥ
|
||||
mi ㄇㄧ
|
||||
mian ㄇㄧㄢ
|
||||
miao ㄇㄧㄠ
|
||||
mie ㄇㄧㄝ
|
||||
min ㄇㄧㄣ
|
||||
ming ㄇㄧㄥ
|
||||
miu ㄇㄧㄡ
|
||||
mo ㄇㄛ
|
||||
mou ㄇㄡ
|
||||
mu ㄇㄨ
|
||||
na ㄋㄚ
|
||||
nai ㄋㄞ
|
||||
nan ㄋㄢ
|
||||
nang ㄋㄤ
|
||||
nao ㄋㄠ
|
||||
ne ㄋㄜ
|
||||
nei ㄋㄟ
|
||||
nen ㄋㄣ
|
||||
neng ㄋㄥ
|
||||
ni ㄋㄧ
|
||||
nia ㄋㄧㄚ
|
||||
nian ㄋㄧㄢ
|
||||
niang ㄋㄧㄤ
|
||||
niao ㄋㄧㄠ
|
||||
nie ㄋㄧㄝ
|
||||
nin ㄋㄧㄣ
|
||||
ning ㄋㄧㄥ
|
||||
niu ㄋㄧㄡ
|
||||
nong ㄋㄨㄥ
|
||||
nou ㄋㄡ
|
||||
nu ㄋㄨ
|
||||
nuan ㄋㄨㄢ
|
||||
nun ㄋㄨㄣ
|
||||
nuo ㄋㄨㄛ
|
||||
nu: ㄋㄩ
|
||||
nv ㄋㄩ
|
||||
nü ㄋㄩ
|
||||
nu:e ㄋㄩㄝ
|
||||
nve ㄋㄩㄝ
|
||||
nüe ㄋㄩㄝ
|
||||
o ㄛ
|
||||
ou ㄡ
|
||||
pa ㄆㄚ
|
||||
pai ㄆㄞ
|
||||
pan ㄆㄢ
|
||||
pang ㄆㄤ
|
||||
pao ㄆㄠ
|
||||
pei ㄆㄟ
|
||||
pen ㄆㄣ
|
||||
peng ㄆㄥ
|
||||
pi ㄆㄧ
|
||||
pian ㄆㄧㄢ
|
||||
piao ㄆㄧㄠ
|
||||
pie ㄆㄧㄝ
|
||||
pin ㄆㄧㄣ
|
||||
ping ㄆㄧㄥ
|
||||
po ㄆㄛ
|
||||
pou ㄆㄡ
|
||||
pu ㄆㄨ
|
||||
qi ㄑㄧ
|
||||
qia ㄑㄧㄚ
|
||||
qian ㄑㄧㄢ
|
||||
qiang ㄑㄧㄤ
|
||||
qiao ㄑㄧㄠ
|
||||
qie ㄑㄧㄝ
|
||||
qin ㄑㄧㄣ
|
||||
qing ㄑㄧㄥ
|
||||
qiong ㄑㄩㄥ
|
||||
qiu ㄑㄧㄡ
|
||||
qu ㄑㄩ
|
||||
quan ㄑㄩㄢ
|
||||
que ㄑㄩㄝ
|
||||
qun ㄑㄩㄣ
|
||||
ran ㄖㄢ
|
||||
rang ㄖㄤ
|
||||
rao ㄖㄠ
|
||||
re ㄖㄜ
|
||||
ren ㄖㄣ
|
||||
reng ㄖㄥ
|
||||
ri ㄖ
|
||||
rong ㄖㄨㄥ
|
||||
rou ㄖㄡ
|
||||
ru ㄖㄨ
|
||||
ruan ㄖㄨㄢ
|
||||
rui ㄖㄨㄟ
|
||||
run ㄖㄨㄣ
|
||||
ruo ㄖㄨㄛ
|
||||
sa ㄙㄚ
|
||||
sai ㄙㄞ
|
||||
san ㄙㄢ
|
||||
sang ㄙㄤ
|
||||
sao ㄙㄠ
|
||||
se ㄙㄜ
|
||||
sei ㄙㄟ
|
||||
sen ㄙㄣ
|
||||
seng ㄙㄥ
|
||||
sha ㄕㄚ
|
||||
shai ㄕㄞ
|
||||
shan ㄕㄢ
|
||||
shang ㄕㄤ
|
||||
shao ㄕㄠ
|
||||
she ㄕㄜ
|
||||
shei ㄕㄟ
|
||||
shen ㄕㄣ
|
||||
sheng ㄕㄥ
|
||||
shi ㄕ
|
||||
shong ㄕㄨㄥ
|
||||
shou ㄕㄡ
|
||||
shu ㄕㄨ
|
||||
shua ㄕㄨㄚ
|
||||
shuai ㄕㄨㄞ
|
||||
shuan ㄕㄨㄢ
|
||||
shuang ㄕㄨㄤ
|
||||
shui ㄕㄨㄟ
|
||||
shun ㄕㄨㄣ
|
||||
shuo ㄕㄨㄛ
|
||||
si ㄙ
|
||||
song ㄙㄨㄥ
|
||||
sou ㄙㄡ
|
||||
su ㄙㄨ
|
||||
suan ㄙㄨㄢ
|
||||
sui ㄙㄨㄟ
|
||||
sun ㄙㄨㄣ
|
||||
suo ㄙㄨㄛ
|
||||
ta ㄊㄚ
|
||||
tai ㄊㄞ
|
||||
tan ㄊㄢ
|
||||
tang ㄊㄤ
|
||||
tao ㄊㄠ
|
||||
te ㄊㄜ
|
||||
tei ㄊㄟ
|
||||
teng ㄊㄥ
|
||||
ti ㄊㄧ
|
||||
tian ㄊㄧㄢ
|
||||
tiao ㄊㄧㄠ
|
||||
tie ㄊㄧㄝ
|
||||
ting ㄊㄧㄥ
|
||||
tong ㄊㄨㄥ
|
||||
tou ㄊㄡ
|
||||
tu ㄊㄨ
|
||||
tuan ㄊㄨㄢ
|
||||
tui ㄊㄨㄟ
|
||||
tun ㄊㄨㄣ
|
||||
tuo ㄊㄨㄛ
|
||||
wa ㄨㄚ
|
||||
wai ㄨㄞ
|
||||
wan ㄨㄢ
|
||||
wang ㄨㄤ
|
||||
wei ㄨㄟ
|
||||
wen ㄨㄣ
|
||||
weng ㄨㄥ
|
||||
wo ㄨㄛ
|
||||
wu ㄨ
|
||||
xi ㄒㄧ
|
||||
xia ㄒㄧㄚ
|
||||
xian ㄒㄧㄢ
|
||||
xiang ㄒㄧㄤ
|
||||
xiao ㄒㄧㄠ
|
||||
xie ㄒㄧㄝ
|
||||
xin ㄒㄧㄣ
|
||||
xing ㄒㄧㄥ
|
||||
xiong ㄒㄩㄥ
|
||||
xiu ㄒㄧㄡ
|
||||
xu ㄒㄩ
|
||||
xuan ㄒㄩㄢ
|
||||
xue ㄒㄩㄝ
|
||||
xun ㄒㄩㄣ
|
||||
ya ㄧㄚ
|
||||
yan ㄧㄢ
|
||||
yang ㄧㄤ
|
||||
yao ㄧㄠ
|
||||
ye ㄧㄝ
|
||||
yi ㄧ
|
||||
yin ㄧㄣ
|
||||
ying ㄧㄥ
|
||||
yo ㄧㄛ
|
||||
yong ㄩㄥ
|
||||
you ㄧㄡ
|
||||
yu ㄩ
|
||||
yuan ㄩㄢ
|
||||
yue ㄩㄝ
|
||||
yun ㄩㄣ
|
||||
za ㄗㄚ
|
||||
zai ㄗㄞ
|
||||
zan ㄗㄢ
|
||||
zang ㄗㄤ
|
||||
zao ㄗㄠ
|
||||
ze ㄗㄜ
|
||||
zei ㄗㄟ
|
||||
zen ㄗㄣ
|
||||
zeng ㄗㄥ
|
||||
zha ㄓㄚ
|
||||
zhai ㄓㄞ
|
||||
zhan ㄓㄢ
|
||||
zhang ㄓㄤ
|
||||
zhao ㄓㄠ
|
||||
zhe ㄓㄜ
|
||||
zhei ㄓㄟ
|
||||
zhen ㄓㄣ
|
||||
zheng ㄓㄥ
|
||||
zhi ㄓ
|
||||
zhong ㄓㄨㄥ
|
||||
zhou ㄓㄡ
|
||||
zhu ㄓㄨ
|
||||
zhua ㄓㄨㄚ
|
||||
zhuai ㄓㄨㄞ
|
||||
zhuan ㄓㄨㄢ
|
||||
zhuang ㄓㄨㄤ
|
||||
zhui ㄓㄨㄟ
|
||||
zhun ㄓㄨㄣ
|
||||
zhuo ㄓㄨㄛ
|
||||
zi ㄗ
|
||||
zong ㄗㄨㄥ
|
||||
zou ㄗㄡ
|
||||
zu ㄗㄨ
|
||||
zuan ㄗㄨㄢ
|
||||
zui ㄗㄨㄟ
|
||||
zun ㄗㄨㄣ
|
||||
zuo ㄗㄨㄛ
|
|
@ -0,0 +1,104 @@
|
||||
package com.marvinelsen.chinese.transliteration
|
||||
|
||||
import io.kotest.core.spec.style.ShouldSpec
|
||||
import io.kotest.datatest.withData
|
||||
import io.kotest.matchers.shouldBe
|
||||
|
||||
class SyllableTest : ShouldSpec({
|
||||
context("from pinyin with tone numbers") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
"sheng1" to Syllable("sheng", Tone.FIRST),
|
||||
"zhi2" to Syllable("zhi", Tone.SECOND),
|
||||
"ka3" to Syllable("ka", Tone.THIRD),
|
||||
"yao4" to Syllable("yao", Tone.FORTH),
|
||||
"me5" to Syllable("me", Tone.FIFTH),
|
||||
"Me5" to Syllable("Me", Tone.FIFTH),
|
||||
"nv3" to Syllable("nv", Tone.THIRD),
|
||||
"nü3" to Syllable("nü", Tone.THIRD),
|
||||
"nu:3" to Syllable("nu:", Tone.THIRD),
|
||||
) { (pinyinWithNumber, expectedSyllable) ->
|
||||
Syllable.fromPinyinWithToneNumber(pinyinWithNumber) shouldBe expectedSyllable
|
||||
}
|
||||
}
|
||||
|
||||
context("from zhuyin") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
"ㄕㄥ" to Syllable("sheng", Tone.FIRST),
|
||||
"ㄓˊ" to Syllable("zhi", Tone.SECOND),
|
||||
"ㄎㄚˇ" to Syllable("ka", Tone.THIRD),
|
||||
"ㄧㄠˋ" to Syllable("yao", Tone.FORTH),
|
||||
"ㄇㄜ˙" to Syllable("me", Tone.FIFTH),
|
||||
"˙ㄇㄜ" to Syllable("me", Tone.FIFTH),
|
||||
"ㄋㄩˇ" to Syllable("nü", Tone.THIRD),
|
||||
) { (zhuyin, expectedSyllable) ->
|
||||
Syllable.fromZhuyin(zhuyin) shouldBe expectedSyllable
|
||||
}
|
||||
}
|
||||
|
||||
context("from invalid pinyin with tone numbers") {
|
||||
}
|
||||
|
||||
context("from invalid zhuyin") {
|
||||
}
|
||||
|
||||
context("format to zhuyin") {
|
||||
withData(
|
||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||
Syllable("sheng", Tone.FIRST) to "ㄕㄥ",
|
||||
Syllable("zhi", Tone.SECOND) to "ㄓˊ",
|
||||
Syllable("ka", Tone.THIRD) to "ㄎㄚˇ",
|
||||
Syllable("yao", Tone.FORTH) to "ㄧㄠˋ",
|
||||
Syllable("me", Tone.FIFTH) to "˙ㄇㄜ",
|
||||
Syllable("nü", Tone.THIRD) to "ㄋㄩˇ",
|
||||
Syllable("nu:", Tone.THIRD) to "ㄋㄩˇ",
|
||||
Syllable("nv", Tone.THIRD) to "ㄋㄩˇ",
|
||||
) { (syllable, expectedZhuyin) ->
|
||||
syllable.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyin
|
||||
}
|
||||
}
|
||||
|
||||
context("format to pinyin with tone numbers") {
|
||||
withData(
|
||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||
Syllable("sheng", Tone.FIRST) to "sheng1",
|
||||
Syllable("zhi", Tone.SECOND) to "zhi2",
|
||||
Syllable("ka", Tone.THIRD) to "ka3",
|
||||
Syllable("yao", Tone.FORTH) to "yao4",
|
||||
Syllable("me", Tone.FIFTH) to "me5",
|
||||
Syllable("nü", Tone.THIRD) to "nü3",
|
||||
Syllable("nu:", Tone.THIRD) to "nu:3",
|
||||
Syllable("nv", Tone.THIRD) to "nv3",
|
||||
) { (syllable, expectedPinyinWithToneNumbers) ->
|
||||
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedPinyinWithToneNumbers
|
||||
}
|
||||
}
|
||||
|
||||
context("format to pinyin with tone marks") {
|
||||
withData(
|
||||
nameFn = { "${it.first.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS)} -> ${it.second}" },
|
||||
Syllable("sheng", Tone.FIRST) to "shēng",
|
||||
Syllable("zhi", Tone.SECOND) to "zhí",
|
||||
Syllable("ka", Tone.THIRD) to "kǎ",
|
||||
Syllable("yao", Tone.FORTH) to "yào",
|
||||
Syllable("me", Tone.FIFTH) to "me",
|
||||
Syllable("zhui", Tone.FIRST) to "zhuī",
|
||||
Syllable("liu", Tone.FIRST) to "liū",
|
||||
Syllable("nü", Tone.THIRD) to "nǚ",
|
||||
Syllable("nu:", Tone.THIRD) to "nǚ",
|
||||
Syllable("nv", Tone.THIRD) to "nǚ",
|
||||
) { (syllable, expectedPinyinWithToneMarks) ->
|
||||
syllable.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedPinyinWithToneMarks
|
||||
}
|
||||
}
|
||||
|
||||
context("format to zhuyin with invalid pinyin syllable") {
|
||||
}
|
||||
|
||||
context("format to pinyin with tone diacritics with invalid pinyin syllable") {
|
||||
}
|
||||
|
||||
context("format to pinyin with tone numbers with invalid pinyin syllable") {
|
||||
}
|
||||
})
|
@ -0,0 +1,187 @@
|
||||
package com.marvinelsen.chinese.transliteration
|
||||
|
||||
import io.kotest.assertions.throwables.shouldThrow
|
||||
import io.kotest.core.spec.style.ShouldSpec
|
||||
import io.kotest.datatest.withData
|
||||
import io.kotest.matchers.nulls.shouldBeNull
|
||||
import io.kotest.matchers.shouldBe
|
||||
|
||||
class ToneTest : ShouldSpec({
|
||||
context("convert correctly from digit") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
'1' to Tone.FIRST,
|
||||
'2' to Tone.SECOND,
|
||||
'3' to Tone.THIRD,
|
||||
'4' to Tone.FORTH,
|
||||
'5' to Tone.FIFTH,
|
||||
) { (digit, expectedTone) ->
|
||||
Tone.fromDigit(digit) shouldBe expectedTone
|
||||
}
|
||||
}
|
||||
|
||||
context("convert correctly from Int") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
1 to Tone.FIRST,
|
||||
2 to Tone.SECOND,
|
||||
3 to Tone.THIRD,
|
||||
4 to Tone.FORTH,
|
||||
5 to Tone.FIFTH,
|
||||
) { (number, expectedTone) ->
|
||||
Tone.fromInt(number) shouldBe expectedTone
|
||||
}
|
||||
}
|
||||
|
||||
context("convert correctly from Zhuyin tone mark") {
|
||||
withData(
|
||||
nameFn = { "'${it.first}' -> ${it.second}" },
|
||||
'ˊ' to Tone.SECOND,
|
||||
'ˇ' to Tone.THIRD,
|
||||
'ˋ' to Tone.FORTH,
|
||||
'˙' to Tone.FIFTH,
|
||||
) { (zhuyinToneMark, expectedTone) ->
|
||||
Tone.fromZhuyinToneMark(zhuyinToneMark) shouldBe expectedTone
|
||||
}
|
||||
}
|
||||
|
||||
context("convert correctly to Int") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to 1,
|
||||
Tone.SECOND to 2,
|
||||
Tone.THIRD to 3,
|
||||
Tone.FORTH to 4,
|
||||
Tone.FIFTH to 5,
|
||||
) { (tone, expectedInteger) ->
|
||||
tone.toInt() shouldBe expectedInteger
|
||||
}
|
||||
}
|
||||
|
||||
context("return null when converting from invalid digit ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> null" },
|
||||
'0',
|
||||
'6',
|
||||
'a',
|
||||
'z',
|
||||
'$',
|
||||
'*',
|
||||
) { invalidDigit ->
|
||||
Tone.fromDigitOrNull(invalidDigit).shouldBeNull()
|
||||
}
|
||||
}
|
||||
|
||||
context("return null when converting from invalid int ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> throws exception" },
|
||||
0,
|
||||
6,
|
||||
-1,
|
||||
Int.MAX_VALUE,
|
||||
Int.MIN_VALUE,
|
||||
) { invalidNumber ->
|
||||
Tone.fromIntOrNull(invalidNumber).shouldBeNull()
|
||||
}
|
||||
}
|
||||
|
||||
context("return null when converting from invalid Zhuyin tone mark ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> throws exception" },
|
||||
'0',
|
||||
'6',
|
||||
'a',
|
||||
'z',
|
||||
'$',
|
||||
'*',
|
||||
) { invalidZhuyinToneMark ->
|
||||
Tone.fromZhuyinToneMarkOrNull(invalidZhuyinToneMark).shouldBeNull()
|
||||
}
|
||||
}
|
||||
|
||||
context("throw exception when converting from invalid digit ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> throws exception" },
|
||||
'0',
|
||||
'6',
|
||||
'a',
|
||||
'z',
|
||||
'$',
|
||||
'*',
|
||||
) { invalidDigit ->
|
||||
shouldThrow<IllegalArgumentException> {
|
||||
Tone.fromDigit(invalidDigit)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
context("throw exception when converting from invalid int ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> throws exception" },
|
||||
0,
|
||||
6,
|
||||
-1,
|
||||
Int.MAX_VALUE,
|
||||
Int.MIN_VALUE,
|
||||
) { invalidNumber ->
|
||||
shouldThrow<IllegalArgumentException> {
|
||||
Tone.fromInt(invalidNumber)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
context("throw exception when converting from invalid Zhuyin tone mark ") {
|
||||
withData(
|
||||
nameFn = { "'$it' -> throws exception" },
|
||||
'0',
|
||||
'6',
|
||||
'a',
|
||||
'z',
|
||||
'$',
|
||||
'*',
|
||||
) { invalidZhuyinToneMark ->
|
||||
shouldThrow<IllegalArgumentException> {
|
||||
Tone.fromZhuyinToneMark(invalidZhuyinToneMark)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
context("format to Zhuyin correctly") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to "",
|
||||
Tone.SECOND to "ˊ",
|
||||
Tone.THIRD to "ˇ",
|
||||
Tone.FORTH to "ˋ",
|
||||
Tone.FIFTH to "˙",
|
||||
) { (tone, expectedZhuyinToneMark) ->
|
||||
tone.format(TransliterationSystem.ZHUYIN) shouldBe expectedZhuyinToneMark
|
||||
}
|
||||
}
|
||||
|
||||
context("format to Pinyin with tone numbers correctly") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to "1",
|
||||
Tone.SECOND to "2",
|
||||
Tone.THIRD to "3",
|
||||
Tone.FORTH to "4",
|
||||
Tone.FIFTH to "5",
|
||||
) { (tone, expectedNumber) ->
|
||||
tone.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) shouldBe expectedNumber
|
||||
}
|
||||
}
|
||||
|
||||
context("format to Pinyin with tone marks correctly") {
|
||||
withData(
|
||||
nameFn = { "${it.first} -> '${it.second}'" },
|
||||
Tone.FIRST to "\u0304",
|
||||
Tone.SECOND to "\u0301",
|
||||
Tone.THIRD to "\u030C",
|
||||
Tone.FORTH to "\u0300",
|
||||
Tone.FIFTH to "",
|
||||
) { (tone, expectedAccent) ->
|
||||
tone.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) shouldBe expectedAccent
|
||||
}
|
||||
}
|
||||
})
|
Loading…
Reference in New Issue
Block a user