Jim Tcl
Check-in [e4f3b0e5d4]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:utf8: Fix merging of adjacent wide character ranges

Adjacent wide character ranges were not being merged correctly, and the final range was not being output.

Fix this, and also merge adjacent combining character ranges.

Signed-off-by: Steve Bennett <steveb@workware.net.au>

Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1:e4f3b0e5d4a9a718e9df611a705902f4bdac50a8
User & Date: steveb@workware.net.au 2017-12-31 01:47:54
Original User & Date: steveb@workware.net.au 2017-12-31 01:47:55
Context
2017-12-31
01:47
utf8: Update UnicodeData.txt and EastAsianWidth.txt to 10.0.0

Signed-off-by: Steve Bennett <steveb@workware.net.au> check-in: 5386172c24 user: steveb@workware.net.au tags: trunk

01:47
utf8: Fix merging of adjacent wide character ranges

Adjacent wide character ranges were not being merged correctly, and the final range was not being output.

Fix this, and also merge adjacent combining character ranges.

Signed-off-by: Steve Bennett <steveb@workware.net.au> check-in: e4f3b0e5d4 user: steveb@workware.net.au tags: trunk

01:47
linenoise: Update to fix potential buffer overflow

From https://github.com/msteveb/linenoise/commit/a4545af5e3766c58100be6bf406b9a0d2049090f

Signed-off-by: Steve Bennett <steveb@workware.net.au> check-in: 29cb9ccf5a user: steveb@workware.net.au tags: trunk

Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to parse-unidata.tcl.

80
81
82
83
84
85
86





















87
88
89
90
91
92
93
94
95
96
97

98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
		}
	}
	if {$n % 4} {
		puts ""
	}
}























foreach type {upper lower title} {
	puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
	output-int-pairs $map($type)
	puts "\};\n"
}

if {$do_width} {
	set f [open $widthfile]
	while {[gets $f buf] >= 0} {
		if {[regexp {^([0-9A-F.]+);W} $buf -> range]} {

			lassign [split $range .] lower - upper
			if {$upper eq ""} {
				set upper $lower
			}
			set lower 0x$lower
			set upper 0x$upper
			if {[info exists endrange]} {
				if {$upper == $endrange + 1} {
					# Just extend the range
					set endrange $upper
					continue
				}
				lappend map(wide) $startrange $endrange
			}
			set startrange $lower
			set endrange $upper
		}
	}
	close $f
}

foreach type {combining wide} {
	puts "static const struct utf8range unicode_range_$type\[\] = \{"
	if {$do_width} {
		output-int-pairs $map($type)
	} else {
		# Just produce empty width tables in this case
		output-int-pairs {}
	}
	puts "\};\n"
}







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>










|
>




<
<
<
<
<
<
<
<
|
<
<
<








|






80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123








124



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
		}
	}
	if {$n % 4} {
		puts ""
	}
}

# Merges adjacent ranges in a list of ranges (lower upper lower upper ...)
proc combine-adjacent-ranges {list} {
	set newlist {}
	foreach {lower upper} $list {
		if {[info exists prev_upper]} {
			if {$lower == $prev_upper + 1} {
				# combine these
				set prev_upper $upper
				continue
			} else {
				# can't combine
				lappend newlist $prev_lower $prev_upper
			}
		}
		set prev_lower $lower
		set prev_upper $upper
	}
	# Now add the last range
	lappend newlist $prev_lower $prev_upper
	return $newlist
}

foreach type {upper lower title} {
	puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
	output-int-pairs $map($type)
	puts "\};\n"
}

if {$do_width} {
	set f [open $widthfile]
	while {[gets $f buf] >= 0} {
		if {[regexp {^([0-9A-Fa-f.]+);W} $buf -> range]} {
			set range [string tolower $range]
			lassign [split $range .] lower - upper
			if {$upper eq ""} {
				set upper $lower
			}








			lappend map(wide) 0x$lower 0x$upper



		}
	}
	close $f
}

foreach type {combining wide} {
	puts "static const struct utf8range unicode_range_$type\[\] = \{"
	if {$do_width} {
		output-int-pairs [combine-adjacent-ranges $map($type)]
	} else {
		# Just produce empty width tables in this case
		output-int-pairs {}
	}
	puts "\};\n"
}