commit d55a11d67a3be794f09da33b61060186fe3c5650 Author: Marvin Elsen Date: Wed Oct 9 19:40:04 2024 +0200 Initial commit diff --git a/.gitea/workflows/pull-request.yaml b/.gitea/workflows/pull-request.yaml new file mode 100644 index 0000000..8be02ff --- /dev/null +++ b/.gitea/workflows/pull-request.yaml @@ -0,0 +1,26 @@ +name: Pull Request + +on: + pull_request: + branches: + - main + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout sources + uses: actions/checkout@v4 + - name: Setup Java + uses: actions/setup-java@v4 + with: + distribution: 'temurin' + java-version: 21 + - name: Setup Gradle + uses: gradle/actions/setup-gradle@v4 + - name: Lint + run: ./gradlew detekt + - name: Build + run: ./gradlew build testClasses -x check + - name: Test + run: ./gradlew test \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b33b62 --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +.gradle +build/ +!gradle/wrapper/gradle-wrapper.jar +!**/src/main/**/build/ +!**/src/test/**/build/ + +### IntelliJ IDEA ### +.idea +*.iws +*.iml +*.ipr +out/ +!**/src/main/**/out/ +!**/src/test/**/out/ + +### Kotlin ### +.kotlin + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache +bin/ +!**/src/main/**/bin/ +!**/src/test/**/bin/ + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6c2803c --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2024 Marvin Elsen + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..567d8fb --- /dev/null +++ b/README.md @@ -0,0 +1,28 @@ +# Willow Database + +Project to create the database for the Willow Chinese dictionary. + +## Build + +To build the project locally, simply run the following command from the terminal: + +```sh +./gradlew build +``` + +## License + +All source code in this repository is licensed under a [MIT license](LICENSE), unless otherwise noted. + +To the following third-party code, data, and files in the repository different licenses apply: + +### Cross-straits (兩岸詞典) Dictionary + +[Cross-Straits Dictionary (兩岸詞典)](https://github.com/g0v/moedict-data-csld) provided by +the [中華文化總會](https://www.gacc.org.tw) is licensed under +a [CC BY-NC-ND 3.0 TW](https://creativecommons.org/licenses/by-nc-nd/3.0/tw/). + +### CC-CEDICT + +[CC-CEDICT](https://cc-cedict.org/wiki) is licensed under +a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-sa/4.0/). diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 0000000..5902c9f --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,44 @@ +plugins { + alias(libs.plugins.kotlin.jvm) + alias(libs.plugins.kotlin.serialization) + alias(libs.plugins.detekt) +} + +group = "com.marvinelsen" +version = "1.0.0" + +repositories { + mavenCentral() + maven { + url = uri("https://gitea.marvinelsen.com/api/packages/marvinelsen/maven") + } +} + +dependencies { + detektPlugins(libs.detekt.formatting) + + implementation(libs.chinese.transliteration) + implementation(libs.cedict.parser) + implementation(libs.cross.straits.parser) + + implementation(libs.sqlite.jdbc) + + implementation(libs.kotlinx.serialization.json) + + testImplementation(libs.kotest.core) + testImplementation(libs.kotest.assertions) +} + +tasks.test { + useJUnitPlatform() +} + +kotlin { + jvmToolchain(21) +} + +detekt { + buildUponDefaultConfig = true + allRules = false + autoCorrect = true +} diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 0000000..7fc6f1f --- /dev/null +++ b/gradle.properties @@ -0,0 +1 @@ +kotlin.code.style=official diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml new file mode 100644 index 0000000..f755e9f --- /dev/null +++ b/gradle/libs.versions.toml @@ -0,0 +1,38 @@ +[versions] +kotlin = "2.0.20" +kotest = "5.9.1" +detekt = "1.23.7" +kotlinx-serialization-json = "1.7.1" + +chinese-transliteration = "2.0.0" +cedict-parser = "3.0.0" +cross-straits-parser = "1.0.1" + +sqlite-jdbc = "3.46.0.1" + +[libraries] +chinese-transliteration = { module = "com.marvinelsen:chinese-transliteration", version.ref = "chinese-transliteration" } +cedict-parser = { module = "com.marvinelsen:cedict-parser", version.ref = "cedict-parser" } +cross-straits-parser = { module = "com.marvinelsen:cross-straits-parser", version.ref = "cross-straits-parser" } + +sqlite-jdbc = { module = "org.xerial:sqlite-jdbc", version.ref = "sqlite-jdbc" } + +kotlinx-serialization-json = { module = "org.jetbrains.kotlinx:kotlinx-serialization-json", version.ref = "kotlinx-serialization-json" } + +# Kotest +# See: https://kotest.io +kotest-core = { module = "io.kotest:kotest-runner-junit5", version.ref = "kotest" } +kotest-assertions = { module = "io.kotest:kotest-assertions-core", version.ref = "kotest" } + +# Detekt +# See: https://detekt.dev +detekt-formatting = { module = "io.gitlab.arturbosch.detekt:detekt-formatting", version.ref = "detekt" } + +[plugins] +# Kotlin +# See: https://plugins.gradle.org/plugin/org.jetbrains.kotlin.jvm +kotlin-jvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" } +kotlin-serialization = { id = "org.jetbrains.kotlin.plugin.serialization", version.ref = "kotlin" } +# Detekt +# See: https://detekt.dev +detekt = { id = "io.gitlab.arturbosch.detekt", version.ref = "detekt" } \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..a4b76b9 Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..9355b41 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..f5feea6 --- /dev/null +++ b/gradlew @@ -0,0 +1,252 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s +' "$PWD" ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..9d21a21 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,94 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..a5c32f5 --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1 @@ +rootProject.name = "willow-database" diff --git a/src/main/kotlin/com/marvinelsen/willow/database/CreateDatabase.kt b/src/main/kotlin/com/marvinelsen/willow/database/CreateDatabase.kt new file mode 100644 index 0000000..a22cee4 --- /dev/null +++ b/src/main/kotlin/com/marvinelsen/willow/database/CreateDatabase.kt @@ -0,0 +1,245 @@ +@file:Suppress("MagicNumber", "LongMethod", "MaximumLineLength", "MaxLineLength") + +package com.marvinelsen.willow.database + +import com.marvinelsen.cedict.api.CedictParser +import com.marvinelsen.chinese.transliteration.api.PinyinSyllable +import com.marvinelsen.chinese.transliteration.api.TransliterationSystem +import com.marvinelsen.chinese.transliteration.api.Zhuyin +import com.marvinelsen.crossstraits.api.CrossStraitsDefinition +import com.marvinelsen.crossstraits.api.CrossStraitsParser +import kotlinx.serialization.builtins.ListSerializer +import kotlinx.serialization.builtins.serializer +import kotlinx.serialization.json.Json +import java.sql.Connection +import java.sql.DriverManager +import java.sql.ResultSet +import java.util.zip.GZIPInputStream + +const val JDBC_CONNECTION_STRING = "jdbc:sqlite:dictionary.db" + +fun main() { + val connection = DriverManager.getConnection(JDBC_CONNECTION_STRING).apply { + autoCommit = false + } + + val statement = connection.createStatement() + statement.executeUpdate( + """ + CREATE TABLE IF NOT EXISTS entry( + id INTEGER PRIMARY KEY, + traditional TEXT NOT NULL, + simplified TEXT NOT NULL, + pinyin_with_tone_marks TEXT NOT NULL, + pinyin_with_tone_numbers TEXT NOT NULL, + zhuyin TEXT NOT NULL, + searchable_pinyin TEXT NOT NULL, + searchable_pinyin_with_tone_numbers TEXT NOT NULL, + cedict_definitions JSON NOT NULL, + cross_straits_definitions JSON NOT NULL, + moe_definitions JSON NOT NULL, + character_count INTEGER NOT NULL, + CONSTRAINT character_count_gte CHECK(character_count > 0) + ); + """.trimIndent() + ) + statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_entry_traditional ON entry (traditional)") + statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_entry_simplified ON entry (simplified)") + statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_entry_character_count ON entry (character_count)") + statement.executeUpdate("CREATE INDEX IF NOT EXISTS idx_entry_searchable_pinyin ON entry (searchable_pinyin)") + statement.executeUpdate( + "CREATE INDEX IF NOT EXISTS idx_entry_searchable_pinyin_with_tone_numbers ON entry (searchable_pinyin_with_tone_numbers)" + ) + + val cedictParser = CedictParser.instance + val cedictEntries = + cedictParser.parseCedict( + GZIPInputStream(object {}.javaClass.getResourceAsStream("/cedict_1_0_ts_utf-8_mdbg.txt.gz")!!) + ) + + val insertStatement = + connection.prepareStatement( + "INSERT OR IGNORE INTO entry(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, searchable_pinyin, searchable_pinyin_with_tone_numbers, cedict_definitions, cross_straits_definitions, moe_definitions, character_count) VALUES(?,?,?,?,?,?,?,?,?,?,?)" + ) + for (entry in cedictEntries) { + try { + insertStatement.setString(1, entry.traditional) + insertStatement.setString(2, entry.simplified) + insertStatement.setString( + 3, + entry.pinyinSyllables.joinToString( + separator = " " + ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) } + ) + insertStatement.setString( + 4, + entry.pinyinSyllables.joinToString( + separator = " " + ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) } + ) + insertStatement.setString( + 5, + entry.pinyinSyllables.joinToString( + separator = Zhuyin.SEPARATOR + ) { it.format(TransliterationSystem.ZHUYIN) } + ) + insertStatement.setString( + 6, + entry.pinyinSyllables.joinToString( + separator = "" + ) { + it.toSearchablePinyin() + } + ) + insertStatement.setString( + 7, + entry.pinyinSyllables.joinToString( + separator = "" + ) { + it.toSearchablePinyinWithToneNumbers() + } + ) + insertStatement.setString( + 8, + Json.encodeToString( + ListSerializer(ListSerializer(String.serializer())), + entry.definitions.map { it.glosses } + ) + ) + insertStatement.setString( + 9, + Json.encodeToString( + ListSerializer(String.serializer()), + emptyList() + ) + ) + insertStatement.setString( + 10, + Json.encodeToString( + ListSerializer(String.serializer()), + emptyList() + ) + ) + insertStatement.setInt(11, entry.traditional.length) + } catch (_: Exception) { + // no-op + } + + insertStatement.addBatch() + } + + insertStatement.executeBatch() + connection.commit() + insertStatement.close() + statement.close() + + createCrossStraitsEntries(connection) + + connection.close() +} + +private fun createCrossStraitsEntries(connection: Connection) { + val insertStatement = + connection.prepareStatement( + "INSERT OR IGNORE INTO entry(traditional, simplified, pinyin_with_tone_marks, pinyin_with_tone_numbers, zhuyin, searchable_pinyin, searchable_pinyin_with_tone_numbers, cedict_definitions, cross_straits_definitions, moe_definitions, character_count) VALUES(?,?,?,?,?,?,?,?,?,?,?)" + ) + + val updateStatement = connection.prepareStatement( + "UPDATE entry SET cross_straits_definitions = ? WHERE id = ?" + ) + val statement = connection.createStatement() + + val crossStraitsEntries = + CrossStraitsParser.instance.parse( + GZIPInputStream(object {}.javaClass.getResourceAsStream("/cross-straits.csv.gz")!!) + ) + for (entry in crossStraitsEntries) { + val pronunciation = entry.pronunciationTaiwan.ifEmpty { entry.pronunciationMainland } + val rs: ResultSet = statement.executeQuery( + "SELECT id FROM entry WHERE traditional = '${entry.traditional}' AND pinyin_with_tone_numbers = '${ + pronunciation.joinToString( + separator = " " + ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) } + }'" + ) + if (rs.next()) { + updateStatement.setString( + 1, + Json.encodeToString(ListSerializer(CrossStraitsDefinition.serializer()), entry.definitions) + ) + updateStatement.setInt(2, rs.getInt(1)) + updateStatement.executeUpdate() + } else { + try { + insertStatement.setString(1, entry.traditional) + insertStatement.setString(2, entry.simplified) + insertStatement.setString( + 3, + pronunciation.joinToString( + separator = " " + ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_MARKS) } + ) + insertStatement.setString( + 4, + pronunciation.joinToString( + separator = " " + ) { it.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS) } + ) + insertStatement.setString( + 5, + pronunciation.joinToString(separator = Zhuyin.SEPARATOR) { it.format(TransliterationSystem.ZHUYIN) } + ) + insertStatement.setString( + 6, + pronunciation.joinToString(separator = "") { + it.toSearchablePinyin() + } + ) + insertStatement.setString( + 7, + pronunciation.joinToString(separator = "") { + it.toSearchablePinyinWithToneNumbers() + } + ) + insertStatement.setString( + 8, + Json.encodeToString( + ListSerializer(String.serializer()), + emptyList() + ) + ) + insertStatement.setString( + 9, + Json.encodeToString( + ListSerializer(CrossStraitsDefinition.serializer()), + entry.definitions + ) + ) + insertStatement.setString( + 10, + Json.encodeToString( + ListSerializer(String.serializer()), + emptyList() + ) + ) + insertStatement.setInt(11, entry.traditional.length) + } catch (_: Exception) { + // no-op + } + + insertStatement.addBatch() + } + rs.close() + } + + insertStatement.executeBatch() + connection.commit() + insertStatement.close() + statement.close() +} + +private fun PinyinSyllable.toSearchablePinyinWithToneNumbers() = + this.format(TransliterationSystem.PINYIN_WITH_TONE_NUMBERS).lowercase() + +private fun PinyinSyllable.toSearchablePinyin() = + this.toSearchablePinyinWithToneNumbers().replace("""\d""".toRegex(), "") diff --git a/src/main/resources/LICENSE-CC-CEDICT b/src/main/resources/LICENSE-CC-CEDICT new file mode 100644 index 0000000..2d58298 --- /dev/null +++ b/src/main/resources/LICENSE-CC-CEDICT @@ -0,0 +1,428 @@ +Attribution-ShareAlike 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution-ShareAlike 4.0 International Public +License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution-ShareAlike 4.0 International Public License ("Public +License"). To the extent this Public License may be interpreted as a +contract, You are granted the Licensed Rights in consideration of Your +acceptance of these terms and conditions, and the Licensor grants You +such rights in consideration of benefits the Licensor receives from +making the Licensed Material available under these terms and +conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. BY-SA Compatible License means a license listed at + creativecommons.org/compatiblelicenses, approved by Creative + Commons as essentially the equivalent of this Public License. + + d. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + e. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + f. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + g. License Elements means the license attributes listed in the name + of a Creative Commons Public License. The License Elements of this + Public License are Attribution and ShareAlike. + + h. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + i. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + j. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + k. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + l. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + m. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. Additional offer from the Licensor -- Adapted Material. + Every recipient of Adapted Material from You + automatically receives an offer from the Licensor to + exercise the Licensed Rights in the Adapted Material + under the conditions of the Adapter's License You apply. + + c. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + b. ShareAlike. + + In addition to the conditions in Section 3(a), if You Share + Adapted Material You produce, the following conditions also apply. + + 1. The Adapter's License You apply must be a Creative Commons + license with the same License Elements, this version or + later, or a BY-SA Compatible License. + + 2. You must include the text of, or the URI or hyperlink to, the + Adapter's License You apply. You may satisfy this condition + in any reasonable manner based on the medium, means, and + context in which You Share Adapted Material. + + 3. You may not offer or impose any additional or different terms + or conditions on, or apply any Effective Technological + Measures to, Adapted Material that restrict exercise of the + rights granted under the Adapter's License You apply. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material, + including for purposes of Section 3(b); and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. + diff --git a/src/main/resources/LICENSE-CROSS-STRAITS b/src/main/resources/LICENSE-CROSS-STRAITS new file mode 100644 index 0000000..bcc2e0e --- /dev/null +++ b/src/main/resources/LICENSE-CROSS-STRAITS @@ -0,0 +1,308 @@ +Creative Commons Legal Code + +Attribution-NonCommercial-NoDerivs 3.0 Taiwan + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS LICENSE DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE INFORMATION PROVIDED, AND DISCLAIMS LIABILITY FOR + DAMAGES RESULTING FROM ITS USE. + +License + +THE WORK (AS DEFINED BELOW) IS PROVIDED UNDER THE TERMS OF THIS CREATIVE +COMMONS PUBLIC LICENSE ("CCPL" OR "LICENSE"). THE WORK IS PROTECTED BY +COPYRIGHT AND/OR OTHER APPLICABLE LAW. ANY USE OF THE WORK OTHER THAN AS +AUTHORIZED UNDER THIS LICENSE OR COPYRIGHT LAW IS PROHIBITED. + +BY EXERCISING ANY RIGHTS TO THE WORK PROVIDED HERE, YOU ACCEPT AND AGREE +TO BE BOUND BY THE TERMS OF THIS LICENSE. TO THE EXTENT THIS LICENSE MAY +BE CONSIDERED TO BE A CONTRACT, THE LICENSOR GRANTS YOU THE RIGHTS +CONTAINED HERE IN CONSIDERATION OF YOUR ACCEPTANCE OF SUCH TERMS AND +CONDITIONS. + +1. Definitions + + a. "Adaptation" means a work based upon the Work, or upon the Work and + other pre-existing works, such as a translation, adaptation, + derivative work, arrangement of music or other alterations of a + literary or artistic work, or phonogram or performance and includes + cinematographic adaptations or any other form in which the Work may be + recast, transformed, or adapted including in any form recognizably + derived from the original, except that a work that constitutes a + Collection will not be considered an Adaptation for the purpose of + this License. For the avoidance of doubt, where the Work is a musical + work, performance or phonogram, the synchronization of the Work in + timed-relation with a moving image ("synching") will be considered an + Adaptation for the purpose of this License. + b. "Collection" means a collection of literary or artistic works, such as + encyclopedias and anthologies, or performances, phonograms or + broadcasts, or other works or subject matter other than works listed + in Section 1(f) below, which, by reason of the selection and + arrangement of their contents, constitute intellectual creations, in + which the Work is included in its entirety in unmodified form along + with one or more other contributions, each constituting separate and + independent works in themselves, which together are assembled into a + collective whole. A work that constitutes a Collection will not be + considered an Adaptation (as defined above) for the purposes of this + License. + c. "Distribute" means to make available to the public the original and + copies of the Work through sale or other transfer of ownership. + d. "Licensor" means the individual, individuals, entity or entities that + offer(s) the Work under the terms of this License. + e. "Original Author" means, in the case of a literary or artistic work, + the individual, individuals, entity or entities who created the Work + or if no individual or entity can be identified, the publisher; and in + addition (i) in the case of a performance the actors, singers, + musicians, dancers, and other persons who act, sing, deliver, declaim, + play in, interpret or otherwise perform literary or artistic works or + expressions of folklore; (ii) in the case of a phonogram the producer + being the person or legal entity who first fixes the sounds of a + performance or other sounds; and, (iii) in the case of broadcasts, the + organization that transmits the broadcast. + f. "Work" means the literary and/or artistic work offered under the terms + of this License including without limitation any production in the + literary, scientific and artistic domain, whatever may be the mode or + form of its expression including digital form, such as a book, + pamphlet and other writing; a lecture, address, sermon or other work + of the same nature; a dramatic or dramatico-musical work; a + choreographic work or entertainment in dumb show; a musical + composition with or without words; a cinematographic work to which are + assimilated works expressed by a process analogous to cinematography; + a work of drawing, painting, architecture, sculpture, engraving or + lithography; a photographic work to which are assimilated works + expressed by a process analogous to photography; a work of applied + art; an illustration, map, plan, sketch or three-dimensional work + relative to geography, topography, architecture or science; a + performance; a broadcast; a phonogram; a compilation of data to the + extent it is protected as a copyrightable work; or a work performed by + a variety or circus performer to the extent it is not otherwise + considered a literary or artistic work. + g. "You" means an individual or entity exercising rights under this + License who has not previously violated the terms of this License with + respect to the Work, or who has received express permission from the + Licensor to exercise rights under this License despite a previous + violation. + h. "Publicly Perform" means to perform public recitations of the Work and + to communicate to the public those public recitations, by any means or + process, including by wire or wireless means or public digital + performances; to make available to the public Works in such a way that + members of the public may access these Works from a place and at a + place individually chosen by them; to perform the Work to the public + by any means or process and the communication to the public of the + performances of the Work, including by public digital performance; to + broadcast and rebroadcast the Work by any means including signs, + sounds or images. + i. "Reproduce" means to make copies of the Work by any means including + without limitation by sound or visual recordings and the right of + fixation and reproducing fixations of the Work, including storage of a + protected performance or phonogram in digital form or other electronic + medium. + +2. Fair Dealing Rights. Nothing in this License is intended to reduce, +limit, or restrict any uses free from copyright or rights arising from +limitations or exceptions that are provided for in connection with the +copyright protection under copyright law or other applicable laws. + +3. License Grant. Subject to the terms and conditions of this License, +Licensor hereby grants You a worldwide, royalty-free, non-exclusive, +perpetual (for the duration of the applicable copyright) license to +exercise the rights in the Work as stated below: + + a. to Reproduce the Work, to incorporate the Work into one or more + Collections, and to Reproduce the Work as incorporated in the + Collections; and, + b. to Distribute and Publicly Perform the Work including as incorporated + in Collections. + +The above rights may be exercised in all media and formats whether now +known or hereafter devised. The above rights include the right to make +such modifications as are technically necessary to exercise the rights in +other media and formats, but otherwise you have no rights to make +Adaptations. Subject to 8(f), all rights not expressly granted by Licensor +are hereby reserved, including but not limited to the rights set forth in +Section 4(d). + +4. Restrictions. The license granted in Section 3 above is expressly made +subject to and limited by the following restrictions: + + a. You may Distribute or Publicly Perform the Work only under the terms + of this License. You must include a copy of, or the Uniform Resource + Identifier (URI) for, this License with every copy of the Work You + Distribute or Publicly Perform. You may not offer or impose any terms + on the Work that restrict the terms of this License or the ability of + the recipient of the Work to exercise the rights granted to that + recipient under the terms of the License. You may not sublicense the + Work. You must keep intact all notices that refer to this License and + to the disclaimer of warranties with every copy of the Work You + Distribute or Publicly Perform. When You Distribute or Publicly + Perform the Work, You may not impose any effective technological + measures on the Work that restrict the ability of a recipient of the + Work from You to exercise the rights granted to that recipient under + the terms of the License. This Section 4(a) applies to the Work as + incorporated in a Collection, but this does not require the Collection + apart from the Work itself to be made subject to the terms of this + License. If You create a Collection, upon notice from any Licensor You + must, to the extent practicable, remove from the Collection any credit + as required by Section 4(c), as requested. + b. You may not exercise any of the rights granted to You in Section 3 + above in any manner that is primarily intended for or directed toward + commercial advantage or private monetary compensation. The exchange of + the Work for other copyrighted works by means of digital file-sharing + or otherwise shall not be considered to be intended for or directed + toward commercial advantage or private monetary compensation, provided + there is no payment of any monetary compensation in connection with + the exchange of copyrighted works. + c. If You Distribute, or Publicly Perform the Work or Collections, You + must, unless a request has been made pursuant to Section 4(a), keep + intact all copyright notices for the Work and provide, reasonable to + the medium or means You are utilizing: (i) the name of the Original + Author (or pseudonym, if applicable) if supplied, and/or if the + Original Author and/or Licensor designate another party or parties + (e.g., a sponsor institute, publishing entity, journal) for + attribution ("Attribution Parties") in Licensor's copyright notice, + terms of service or by other reasonable means, the name of such party + or parties; (ii) the title of the Work if supplied; (iii) to the + extent reasonably practicable, the URI, if any, that Licensor + specifies to be associated with the Work, unless such URI does not + refer to the copyright notice or licensing information for the Work. + The credit required by this Section 4(c) may be implemented in any + reasonable manner; provided, however, that in the case of a + Collection, at a minimum such credit will appear, if a credit for all + contributing authors of Collection appears, then as part of these + credits and in a manner at least as prominent as the credits for the + other contributing authors. For the avoidance of doubt, You may only + use the credit required by this Section for the purpose of attribution + in the manner set out above and, by exercising Your rights under this + License, You may not implicitly or explicitly assert or imply any + connection with, sponsorship or endorsement by the Original Author, + Licensor and/or Attribution Parties, as appropriate, of You or Your + use of the Work, without the separate, express prior written + permission of the Original Author, Licensor and/or Attribution + Parties. + d. For the avoidance of doubt: + + i. Non-waivable Compulsory License Schemes. In those jurisdictions in + which the right to collect royalties through any statutory or + compulsory licensing scheme cannot be waived, the Licensor + reserves the exclusive right to collect such royalties for any + exercise by You of the rights granted under this License; + ii. Waivable Compulsory License Schemes. In those jurisdictions in + which the right to collect royalties through any statutory or + compulsory licensing scheme can be waived, the Licensor reserves + the exclusive right to collect such royalties for any exercise by + You of the rights granted under this License if Your exercise of + such rights is for a purpose or use which is otherwise than + noncommercial as permitted under Section 4(b) and otherwise waives + the right to collect royalties through any statutory or compulsory + licensing scheme; and, + iii. Voluntary License Schemes. The Licensor reserves the right to + collect royalties, whether individually or, in the event that the + Licensor is a member of a collecting society that administers + voluntary licensing schemes, via that society, from any exercise + by You of the rights granted under this License that is for a + purpose or use which is otherwise than noncommercial as permitted + under Section 4(b). + e. Except as otherwise agreed in writing by the Licensor or as may be + otherwise permitted by applicable law, if You Reproduce, Distribute or + Publicly Perform the Work either by itself or as part of any + Collections, You must not distort, mutilate, modify or take other + derogatory action in relation to the Work which would be prejudicial + to the Original Author's honor or reputation. + +5. Representations, Warranties and Disclaimer + +UNLESS OTHERWISE MUTUALLY AGREED BY THE PARTIES IN WRITING, LICENSOR +OFFERS THE WORK AS-IS AND MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY +KIND CONCERNING THE WORK, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, +INCLUDING, WITHOUT LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY, +FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF +LATENT OR OTHER DEFECTS, ACCURACY, OR THE PRESENCE OF ABSENCE OF ERRORS, +WHETHER OR NOT DISCOVERABLE. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION +OF IMPLIED WARRANTIES, SO SUCH EXCLUSION MAY NOT APPLY TO YOU. + +6. Limitation on Liability. EXCEPT TO THE EXTENT REQUIRED BY APPLICABLE +LAW, IN NO EVENT WILL LICENSOR BE LIABLE TO YOU ON ANY LEGAL THEORY FOR +ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL, PUNITIVE OR EXEMPLARY DAMAGES +ARISING OUT OF THIS LICENSE OR THE USE OF THE WORK, EVEN IF LICENSOR HAS +BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. Termination + + a. This License and the rights granted hereunder will terminate + automatically upon any breach by You of the terms of this License. + Individuals or entities who have received Collections from You under + this License, however, will not have their licenses terminated + provided such individuals or entities remain in full compliance with + those licenses. Sections 1, 2, 5, 6, 7, and 8 will survive any + termination of this License. + b. Subject to the above terms and conditions, the license granted here is + perpetual (for the duration of the applicable copyright in the Work). + Notwithstanding the above, Licensor reserves the right to release the + Work under different license terms or to stop distributing the Work at + any time; provided, however that any such election will not serve to + withdraw this License (or any other license that has been, or is + required to be, granted under the terms of this License), and this + License will continue in full force and effect unless terminated as + stated above. + +8. Miscellaneous + + a. Each time You Distribute or Publicly Perform the Work or a Collection, + the Licensor offers to the recipient a license to the Work on the same + terms and conditions as the license granted to You under this License. + b. If any provision of this License is invalid or unenforceable under + applicable law, it shall not affect the validity or enforceability of + the remainder of the terms of this License, and without further action + by the parties to this agreement, such provision shall be reformed to + the minimum extent necessary to make such provision valid and + enforceable. + c. No term or provision of this License shall be deemed waived and no + breach consented to unless such waiver or consent shall be in writing + and signed by the party to be charged with such waiver or consent. + d. This License constitutes the entire agreement between the parties with + respect to the Work licensed here. There are no understandings, + agreements or representations with respect to the Work not specified + here. Licensor shall not be bound by any additional provisions that + may appear in any communication from You. This License may not be + modified without the mutual written agreement of the Licensor and You. + e. The rights granted under, and the subject matter referenced, in this + License were drafted utilizing the terminology of the Berne Convention + for the Protection of Literary and Artistic Works (as amended on + September 28, 1979), the Rome Convention of 1961, the WIPO Copyright + Treaty of 1996, the WIPO Performances and Phonograms Treaty of 1996 + and the Universal Copyright Convention (as revised on July 24, 1971). + These rights and subject matter take effect in the relevant + jurisdiction in which the License terms are sought to be enforced + according to the corresponding provisions of the implementation of + those treaty provisions in the applicable national law. If the + standard suite of rights granted under applicable copyright law + includes additional rights not granted under this License, such + additional rights are deemed to be included in the License; this + License is not intended to restrict the license of any rights under + applicable law. + + +Creative Commons Notice + + Creative Commons is not a party to this License, and makes no warranty + whatsoever in connection with the Work. Creative Commons will not be + liable to You or any party on any legal theory for any damages + whatsoever, including without limitation any general, special, + incidental or consequential damages arising in connection to this + license. Notwithstanding the foregoing two (2) sentences, if Creative + Commons has expressly identified itself as the Licensor hereunder, it + shall have all rights and obligations of Licensor. + + Except for the limited purpose of indicating to the public that the + Work is licensed under the CCPL, Creative Commons does not authorize + the use by either party of the trademark "Creative Commons" or any + related trademark or logo of Creative Commons without the prior + written consent of Creative Commons. Any permitted use will be in + compliance with Creative Commons' then-current trademark usage + guidelines, as may be published on its website or otherwise made + available upon request from time to time. For the avoidance of doubt, + this trademark restriction does not form part of this License. + + Creative Commons may be contacted at https://creativecommons.org/. \ No newline at end of file diff --git a/src/main/resources/cedict_1_0_ts_utf-8_mdbg.txt.gz b/src/main/resources/cedict_1_0_ts_utf-8_mdbg.txt.gz new file mode 100644 index 0000000..0f28151 Binary files /dev/null and b/src/main/resources/cedict_1_0_ts_utf-8_mdbg.txt.gz differ diff --git a/src/main/resources/cross-straits.csv.gz b/src/main/resources/cross-straits.csv.gz new file mode 100644 index 0000000..0914ff6 Binary files /dev/null and b/src/main/resources/cross-straits.csv.gz differ