Compare commits
No commits in common. "c6347a6b13fea83453ae133c5c3206af8a8f0f83" and "78020d6843b95b9de7077841065e7af1752d8d50" have entirely different histories.
c6347a6b13
...
78020d6843
@ -5,7 +5,7 @@ plugins {
|
|||||||
}
|
}
|
||||||
|
|
||||||
group = "com.marvinelsen"
|
group = "com.marvinelsen"
|
||||||
version = "2.0.0"
|
version = "1.0.1"
|
||||||
|
|
||||||
repositories {
|
repositories {
|
||||||
mavenCentral()
|
mavenCentral()
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
package com.marvinelsen.cedict.api
|
package com.marvinelsen.cedict.api
|
||||||
|
|
||||||
import com.marvinelsen.cedict.internal.RegexCedictParser
|
import com.marvinelsen.cedict.internal.CedictParserImpl
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
interface CedictParser {
|
interface CedictParser {
|
||||||
fun parseCedict(inputStream: InputStream): Sequence<CedictEntry>
|
fun parseCedict(inputStream: InputStream): List<CedictEntry>
|
||||||
|
|
||||||
companion object {
|
companion object {
|
||||||
val instance: CedictParser by lazy { RegexCedictParser() }
|
val instance: CedictParser by lazy { CedictParserImpl() }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,7 @@ import com.marvinelsen.cedict.api.CedictParser
|
|||||||
import com.marvinelsen.chinese.transliteration.Syllable
|
import com.marvinelsen.chinese.transliteration.Syllable
|
||||||
import java.io.InputStream
|
import java.io.InputStream
|
||||||
|
|
||||||
internal class RegexCedictParser : CedictParser {
|
internal class CedictParserImpl : CedictParser {
|
||||||
companion object {
|
companion object {
|
||||||
private const val DEFINITION_SEPARATOR = '/'
|
private const val DEFINITION_SEPARATOR = '/'
|
||||||
private const val GLOSS_SEPARATOR = ';'
|
private const val GLOSS_SEPARATOR = ';'
|
||||||
@ -20,11 +20,11 @@ internal class RegexCedictParser : CedictParser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
override fun parseCedict(inputStream: InputStream) =
|
override fun parseCedict(inputStream: InputStream) =
|
||||||
inputStream
|
inputStream.bufferedReader().useLines { lines ->
|
||||||
.bufferedReader()
|
lines.filterNot(::isComment)
|
||||||
.lineSequence()
|
|
||||||
.filterNot(::isComment)
|
|
||||||
.map(::toCedictEntry)
|
.map(::toCedictEntry)
|
||||||
|
.toList()
|
||||||
|
}
|
||||||
|
|
||||||
private fun isComment(line: String) = line[0] == COMMENT_MARKER
|
private fun isComment(line: String) = line[0] == COMMENT_MARKER
|
||||||
|
|
||||||
@ -48,5 +48,9 @@ internal class RegexCedictParser : CedictParser {
|
|||||||
|
|
||||||
private fun toCedictDefinitions(definitions: String) = definitions
|
private fun toCedictDefinitions(definitions: String) = definitions
|
||||||
.split(DEFINITION_SEPARATOR)
|
.split(DEFINITION_SEPARATOR)
|
||||||
.map { CedictDefinition(it.split(GLOSS_SEPARATOR).map(String::trim)) }
|
.map {
|
||||||
|
CedictDefinition(
|
||||||
|
glosses = it.split(GLOSS_SEPARATOR).map(String::trim)
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
@ -7,9 +7,9 @@ import io.kotest.core.spec.style.ShouldSpec
|
|||||||
import io.kotest.matchers.shouldBe
|
import io.kotest.matchers.shouldBe
|
||||||
import java.util.zip.GZIPInputStream
|
import java.util.zip.GZIPInputStream
|
||||||
|
|
||||||
class RegexCedictParserTest : ShouldSpec({
|
class CedictParserImplTest : ShouldSpec({
|
||||||
should("parse lines correctly") {
|
should("parse lines correctly") {
|
||||||
val cedictParser = RegexCedictParser()
|
val cedictParser = CedictParserImpl()
|
||||||
val cedictEntry =
|
val cedictEntry =
|
||||||
cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/")
|
cedictParser.toCedictEntry("皮實 皮实 [pi2 shi5] /(of things) durable/(of people) sturdy; tough/")
|
||||||
|
|
||||||
@ -29,12 +29,9 @@ class RegexCedictParserTest : ShouldSpec({
|
|||||||
val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz"
|
val cedictFilePath = "/cedict_1_0_ts_utf-8_mdbg.txt.gz"
|
||||||
val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath))
|
val cedictFileStream = GZIPInputStream(javaClass.getResourceAsStream(cedictFilePath))
|
||||||
|
|
||||||
val cedictParser = RegexCedictParser()
|
val cedictParser = CedictParserImpl()
|
||||||
|
|
||||||
cedictFileStream.use {
|
|
||||||
val cedictEntries = cedictParser.parseCedict(cedictFileStream)
|
val cedictEntries = cedictParser.parseCedict(cedictFileStream)
|
||||||
|
|
||||||
cedictEntries.toList().size shouldBe 122_508
|
cedictEntries.size shouldBe 122_508
|
||||||
}
|
|
||||||
}
|
}
|
||||||
})
|
})
|
Loading…
Reference in New Issue
Block a user