Nikolay Kasyanov
4 years ago
committed by
GitHub
4 changed files with 108 additions and 0 deletions
@ -0,0 +1,9 @@
|
||||
The MIT License (MIT) |
||||
|
||||
Copyright © 2021 Unbewohne | Nikolay Kasyanov |
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: |
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. |
||||
|
||||
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
@ -0,0 +1,22 @@
|
||||
# UnicodeCharsGenerator |
||||
## Ever seen some of these weird looking Unicode characters on the Internet ? Time to find more ) |
||||
|
||||
# Usage |
||||
## There are 2 flags to specify - `-inline` and `-limit` |
||||
`-inline` flag takes an integer. If not negative - each line in the output file will contain specified number of characters |
||||
`-limit` flag takes an integer. If not negative - the output file will contain all Unicode characters up to specified one |
||||
|
||||
## Examples |
||||
|
||||
### `-inline` |
||||
1. `./UnicodeCharsGenerator -inline=20` - the generated output file will **try** to contain 20 characters in one line |
||||
2. `./UnicodeCharsGenerator -inline=-1` - all characters will be put in one line |
||||
3. `./UnicodeCharsGenerator` - as in 2., all characters will be put in one line |
||||
|
||||
### `-limit` |
||||
1. `./UnicodeCharsGenerator -limit=20000` - the generated output file will contain all Unicode characters up to 20000 one |
||||
2. `./UnicodeCharsGenerator -limit=-727` - the limit is ignored and you will get **all 10FFFF** Unicode characters |
||||
|
||||
### general |
||||
1. `./UnicodeCharsGenerator` - all Unicode characters, all in one line |
||||
2. `./UnicodeCharsGenerator -limit=900 -inline=5` - 900 first characters, only 5 characters on one line |
@ -0,0 +1,74 @@
|
||||
package main |
||||
|
||||
import ( |
||||
"flag" |
||||
"os" |
||||
) |
||||
|
||||
// Unicode has 143859 ("active"|"usable") characters
|
||||
// Unicode`s character codes go from (0) U+0000 to U+10FFFF (1114111)
|
||||
// These code points can be decoded by UTF-8, UTF-16 and UTF-32.
|
||||
// Most people use UTF-8 encoding daily that covers all necessary characters and many "exotic" ones )
|
||||
// That means that in order to "get" all the unicode characters we need to loop over all
|
||||
// codes (from 0 to 1114111) and convert each code into rune
|
||||
|
||||
// In order to read these runes correctly, we can use UTF-8, UTF-16 and other encodings
|
||||
// this is done by a text editor that you`re using.
|
||||
|
||||
// This is new for me, so I can be wrong about something, I`m sorry if that`s the case
|
||||
|
||||
// Sources
|
||||
// https://en.wikipedia.org/wiki/Unicode
|
||||
// https://en.wikipedia.org/wiki/List_of_Unicode_characters
|
||||
// https://www.unicode.org/faq/utf_bom.html
|
||||
// https://www.thoughtco.com/what-is-unicode-2034272
|
||||
// https://golangdocs.com/rune-in-golang
|
||||
|
||||
func generateUnicodeChars(runesInline int, limit int32) { |
||||
const UNICODEMAX int = 1114111 // ALL codes
|
||||
|
||||
// create output file
|
||||
outputFile, err := os.Create("Unicode.txt") |
||||
if err != nil { |
||||
panic(err) |
||||
} |
||||
defer outputFile.Close() |
||||
|
||||
// process given arguments
|
||||
if runesInline <= 0 { |
||||
// all characters will be put in one line
|
||||
runesInline = UNICODEMAX |
||||
} |
||||
|
||||
if limit <= 0 { |
||||
// no limit
|
||||
limit = int32(UNICODEMAX) |
||||
} |
||||
|
||||
// looping through all codes in unicode
|
||||
var runeCounter uint64 = 0 |
||||
for i := 0; i < UNICODEMAX; i++ { |
||||
|
||||
if runeCounter == uint64(runesInline) { |
||||
// inject a new line in file and reset the counter
|
||||
outputFile.Write([]byte("\n")) |
||||
runeCounter = 0 |
||||
} |
||||
outputFile.Write([]byte(string(rune(i)))) |
||||
runeCounter++ |
||||
|
||||
// the current Unicode code point is the last one
|
||||
if i == int(limit) { |
||||
break |
||||
} |
||||
} |
||||
} |
||||
|
||||
func main() { |
||||
var RUNESINLINE *int = flag.Int("inline", 0, "How many characters will be placed in one line in the output file before the \\n") |
||||
var LIMIT *int = flag.Int("limit", 0, "Set the limit") |
||||
flag.Parse() |
||||
|
||||
generateUnicodeChars(*RUNESINLINE, int32(*LIMIT)) |
||||
|
||||
} |
Loading…
Reference in new issue