\relax \documentclass[twocolumn]{article} \usepackage[specification]{fontdoc}[2004/11/28] \usepackage{shortvrb} \MakeShortVerb{\|} \begin{document} \title{\texttt{T1} encoding draft specification} \author{Lars Hellstr\"om} \date{2002/07/05} \maketitle \begin{abstract} The \texttt{T1} encoding covers most\footnote{But not quite all.} letters in the latin script that are used in European languages. Code points 32--126 coincide with ASCII\footnote{As defined in Appendix C of \emph{The \TeX book}~\cite{TeXbook}, not as defined by the ANSI.} and code points 192--255 are mainly the same as in the latin-1 encoding~\cite{ISO8859-1,latin-page}, but there are exceptions. \end{abstract} \encoding \needsfontinstversion{1.928} \begincomment \section{Mandatory characters} \endcomment \begincomment \subsection{Letters} More than two thirds of the characters in the encoding are letters. This is a deliberate choice: \TeX's hyphenation algorithm requires that all characters in a word are taken from the same font, hence it is advantageous\footnote{Probably less so than what was thought when the encoding was designed, but still advantageous.} to keep as many letters as possible in the same font. Taking symbols from other fonts is less of a disadvantage. \subsubsection{``Unaccented'' letters in slots 0--127} \endcomment \nextslot{25} \setslot{dotlessi} \Unicode{0131}{LATIN SMALL LETTER DOTLESS I} \comment{A dotless i `\i', used to produce accented letters such as `\=\i'.} \endsetslot \setslot{dotlessj} \Unicode{F6BE}{LATIN SMALL LETTER DOTLESS J} \comment{A dotless j `\j', used to produce accented letters such as `\=\j'. The Unicode standard does not define this character, but Adobe has assigned code point \texttt{U+F6BE} (which lies in the private use subarea assigned by Adobe) to it.} \endsetslot \nextslot{65} \setslot{A} \Unicode{0041}{LATIN CAPITAL LETTER A} \endsetslot \setslot{B} \Unicode{0042}{LATIN CAPITAL LETTER B} \endsetslot \setslot{C} \Unicode{0043}{LATIN CAPITAL LETTER C} \endsetslot \setslot{D} \Unicode{0044}{LATIN CAPITAL LETTER D} \endsetslot \setslot{E} \Unicode{0045}{LATIN CAPITAL LETTER E} \endsetslot \setslot{F} \Unicode{0046}{LATIN CAPITAL LETTER F} \endsetslot \setslot{G} \Unicode{0047}{LATIN CAPITAL LETTER G} \endsetslot \setslot{H} \Unicode{0048}{LATIN CAPITAL LETTER H} \endsetslot \setslot{I} \Unicode{0049}{LATIN CAPITAL LETTER I} \oddligature{This ligature only occurs in Dutch typography.} {LIG}{J}{IJ} \oddligature{This ligature only occurs in Dutch typography.} {LIG}{j}{IJ} \endsetslot \setslot{J} \Unicode{004A}{LATIN CAPITAL LETTER J} \endsetslot \setslot{K} \Unicode{004B}{LATIN CAPITAL LETTER K} \endsetslot \setslot{L} \Unicode{004C}{LATIN CAPITAL LETTER L} \endsetslot \setslot{M} \Unicode{004D}{LATIN CAPITAL LETTER M} \endsetslot \setslot{N} \Unicode{004E}{LATIN CAPITAL LETTER N} \endsetslot \setslot{O} \Unicode{004F}{LATIN CAPITAL LETTER O} \endsetslot \setslot{P} \Unicode{0050}{LATIN CAPITAL LETTER P} \endsetslot \setslot{Q} \Unicode{0051}{LATIN CAPITAL LETTER Q} \endsetslot \setslot{R} \Unicode{0052}{LATIN CAPITAL LETTER R} \endsetslot \setslot{S} \Unicode{0053}{LATIN CAPITAL LETTER S} \endsetslot \setslot{T} \Unicode{0054}{LATIN CAPITAL LETTER T} \endsetslot \setslot{U} \Unicode{0055}{LATIN CAPITAL LETTER U} \endsetslot \setslot{V} \Unicode{0056}{LATIN CAPITAL LETTER V} \endsetslot \setslot{W} \Unicode{0057}{LATIN CAPITAL LETTER W} \endsetslot \setslot{X} \Unicode{0058}{LATIN CAPITAL LETTER X} \endsetslot \setslot{Y} \Unicode{0059}{LATIN CAPITAL LETTER Y} \endsetslot \setslot{Z} \Unicode{005A}{LATIN CAPITAL LETTER Z} \endsetslot \nextslot{97} \setslot{a} \Unicode{0061}{LATIN SMALL LETTER A} \endsetslot \setslot{b} \Unicode{0062}{LATIN SMALL LETTER B} \endsetslot \setslot{c} \Unicode{0063}{LATIN SMALL LETTER C} \endsetslot \setslot{d} \Unicode{0064}{LATIN SMALL LETTER D} \endsetslot \setslot{e} \Unicode{0065}{LATIN SMALL LETTER E} \endsetslot \setslot{f} \Unicode{0066}{LATIN SMALL LETTER F} \ligature{LIG}{f}{ff} \ligature{LIG}{i}{fi} \ligature{LIG}{l}{fl} \endsetslot \setslot{g} \Unicode{0067}{LATIN SMALL LETTER G} \endsetslot \setslot{h} \Unicode{0068}{LATIN SMALL LETTER H} \endsetslot \setslot{i} \Unicode{0069}{LATIN SMALL LETTER I} \oddligature{This ligature only occurs in Dutch typography.} {LIG}{j}{ij} \endsetslot \setslot{j} \Unicode{006A}{LATIN SMALL LETTER J} \endsetslot \setslot{k} \Unicode{006B}{LATIN SMALL LETTER K} \endsetslot \setslot{l} \Unicode{006C}{LATIN SMALL LETTER L} \endsetslot \setslot{m} \Unicode{006D}{LATIN SMALL LETTER M} \endsetslot \setslot{n} \Unicode{006E}{LATIN SMALL LETTER N} \endsetslot \setslot{o} \Unicode{006F}{LATIN SMALL LETTER O} \endsetslot \setslot{p} \Unicode{0070}{LATIN SMALL LETTER P} \endsetslot \setslot{q} \Unicode{0071}{LATIN SMALL LETTER Q} \endsetslot \setslot{r} \Unicode{0072}{LATIN SMALL LETTER R} \endsetslot \setslot{s} \Unicode{0073}{LATIN SMALL LETTER S} \endsetslot \setslot{t} \Unicode{0074}{LATIN SMALL LETTER T} \endsetslot \setslot{u} \Unicode{0075}{LATIN SMALL LETTER U} \endsetslot \setslot{v} \Unicode{0076}{LATIN SMALL LETTER V} \endsetslot \setslot{w} \Unicode{0077}{LATIN SMALL LETTER W} \endsetslot \setslot{x} \Unicode{0078}{LATIN SMALL LETTER X} \endsetslot \setslot{y} \Unicode{0079}{LATIN SMALL LETTER Y} \endsetslot \setslot{z} \Unicode{007A}{LATIN SMALL LETTER Z} \endsetslot \begincomment \subsubsection{Mainly East-European accented letters in slots 128--191} \endcomment \nextslot{128} \setslot{Abreve} \Unicode{0102}{LATIN CAPITAL LETTER A WITH BREVE} \endsetslot \setslot{Aogonek} \Unicode{0104}{LATIN CAPITAL LETTER A WITH OGONEK} \endsetslot \setslot{Cacute} \Unicode{0106}{LATIN CAPITAL LETTER C WITH ACUTE} \endsetslot \setslot{Ccaron} \Unicode{010C}{LATIN CAPITAL LETTER C WITH CARON} \endsetslot \setslot{Dcaron} \Unicode{010E}{LATIN CAPITAL LETTER D WITH CARON} \endsetslot \setslot{Ecaron} \Unicode{011A}{LATIN CAPITAL LETTER E WITH CARON} \endsetslot \setslot{Eogonek} \Unicode{0118}{LATIN CAPITAL LETTER E WITH OGONEK} \endsetslot \setslot{Gbreve} \Unicode{011E}{LATIN CAPITAL LETTER G WITH BREVE} \endsetslot \setslot{Lacute} \Unicode{0139}{LATIN CAPITAL LETTER L WITH ACUTE} \endsetslot \setslot{Lcaron} \Unicode{013D}{LATIN CAPITAL LETTER L WITH CARON} \comment{The normal glyph for this character consists of a capital L and a right single quote, but the underlying accent is a caron. The glyph name \texttt{Lquoteright} is not uncommon, but \cite{AGL} uses \texttt{Lcaron}.} \endsetslot \setslot{Lslash} \Unicode{0141}{LATIN CAPITAL LETTER L WITH STROKE} \comment{The letter `\L'.} \endsetslot \setslot{Nacute} \Unicode{0143}{LATIN CAPITAL LETTER N WITH ACUTE} \endsetslot \setslot{Ncaron} \Unicode{0147}{LATIN CAPITAL LETTER N WITH CARON} \endsetslot \setslot{Eng} \Unicode{014A}{LATIN CAPITAL LETTER ENG} \endsetslot \setslot{Ohungarumlaut} \Unicode{0150}{LATIN CAPITAL LETTER O WITH DOUBLE ACUTE} \comment{The letter `\H{O}'.} \endsetslot \setslot{Racute} \Unicode{0154}{LATIN CAPITAL LETTER R WITH ACUTE} \endsetslot \setslot{Rcaron} \Unicode{0158}{LATIN CAPITAL LETTER R WITH CARON} \endsetslot \setslot{Sacute} \Unicode{015A}{LATIN CAPITAL LETTER S WITH ACUTE} \endsetslot \setslot{Scaron} \Unicode{0160}{LATIN CAPITAL LETTER S WITH CARON} \endsetslot \setslot{Scedilla} \Unicode{015E}{LATIN CAPITAL LETTER S WITH CEDILLA} \comment{In Romanian, character \textunicode{0218}{latin capital letter s with comma below} is preferred, but that wasn't known when this encoding was designed.} \endsetslot \setslot{Tcaron} \Unicode{0164}{LATIN CAPITAL LETTER T WITH CARON} \endsetslot \setslot{Tcommaaccent} \Unicode{021A}{LATIN CAPITAL LETTER T WITH COMMA BELOW} \endsetslot \setslot{Uhungarumlaut} \Unicode{0170}{LATIN CAPITAL LETTER U WITH DOUBLE ACUTE} \comment{The letter `\H{U}'.} \endsetslot \setslot{Uring} \Unicode{016E}{LATIN CAPITAL LETTER U WITH RING ABOVE} \endsetslot \setslot{Ydieresis} \Unicode{0178}{LATIN CAPITAL LETTER Y WITH DIAERESIS} \endsetslot \setslot{Zacute} \Unicode{0179}{LATIN CAPITAL LETTER Z WITH ACUTE} \endsetslot \setslot{Zcaron} \Unicode{017D}{LATIN CAPITAL LETTER Z WITH CARON} \endsetslot \setslot{Zdotaccent} \Unicode{017B}{LATIN CAPITAL LETTER Z WITH DOT ABOVE} \endsetslot \skipslots{1} \setslot{Idotaccent} \Unicode{0130}{LATIN CAPITAL LETTER I WITH DOT ABOVE} \comment{This character's |\lccode| value points to slot 105 (\texttt{i}), as does that of slot 73 (\texttt{I}).} \endsetslot \setslot{dcroat} \Unicode{0111}{LATIN SMALL LETTER D WITH STROKE} \endsetslot \nextslot{160} \setslot{abreve} \Unicode{0103}{LATIN SMALL LETTER A WITH BREVE} \endsetslot \setslot{aogonek} \Unicode{0105}{LATIN SMALL LETTER A WITH OGONEK} \endsetslot \setslot{cacute} \Unicode{0107}{LATIN SMALL LETTER C WITH ACUTE} \endsetslot \setslot{ccaron} \Unicode{010D}{LATIN SMALL LETTER C WITH CARON} \endsetslot \setslot{dcaron} \Unicode{010F}{LATIN SMALL LETTER D WITH CARON} \comment{The normal glyph for this character consists of a lower case d and a right single quote, but the underlying accent is a caron.} \endsetslot \setslot{ecaron} \Unicode{011B}{LATIN SMALL LETTER E WITH CARON} \endsetslot \setslot{eogonek} \Unicode{0119}{LATIN SMALL LETTER E WITH OGONEK} \endsetslot \setslot{gbreve} \Unicode{011F}{LATIN SMALL LETTER G WITH BREVE} \endsetslot \setslot{lacute} \Unicode{013A}{LATIN SMALL LETTER L WITH ACUTE} \endsetslot \setslot{lcaron} \Unicode{013E}{LATIN SMALL LETTER L WITH CARON} \comment{The normal glyph for this character consists of a lower case l and a right single quote, but the underlying accent is a caron. The glyph name \texttt{lquoteright} is not uncommon, but \cite{AGL} uses \texttt{lcaron}.} \endsetslot \setslot{lslash} \Unicode{0142}{LATIN SMALL LETTER L WITH STROKE} \endsetslot \setslot{nacute} \Unicode{0144}{LATIN SMALL LETTER N WITH ACUTE} \endsetslot \setslot{ncaron} \Unicode{0148}{LATIN SMALL LETTER N WITH CARON} \endsetslot \setslot{eng} \Unicode{014B}{LATIN SMALL LETTER ENG} \endsetslot \setslot{ohungarumlaut} \Unicode{0151}{LATIN SMALL LETTER O WITH DOUBLE ACUTE} \comment{The letter `\H{o}'.} \endsetslot \setslot{racute} \Unicode{0155}{LATIN SMALL LETTER R WITH ACUTE} \endsetslot \setslot{rcaron} \Unicode{0159}{LATIN SMALL LETTER R WITH CARON} \endsetslot \setslot{sacute} \Unicode{015B}{LATIN SMALL LETTER S WITH ACUTE} \endsetslot \setslot{scaron} \Unicode{0161}{LATIN SMALL LETTER S WITH CARON} \endsetslot \setslot{scedilla} \Unicode{015F}{LATIN SMALL LETTER S WITH CEDILLA} \comment{In Romanian, character \textunicode{0219}{latin small letter s with comma below} is preferred, but that wasn't known when this encoding was designed.} \endsetslot \setslot{tcaron} \Unicode{0165}{LATIN SMALL LETTER T WITH CARON} \comment{The normal glyph for this character consists of a lower case t and a right single quote, but the underlying accent is a caron.} \endsetslot \setslot{tcommaaccent} \Unicode{021B}{LATIN SMALL LETTER T WITH COMMA BELOW} \endsetslot \setslot{uhungarumlaut} \Unicode{0171}{LATIN SMALL LETTER U WITH DOUBLE ACUTE} \endsetslot \setslot{uring} \Unicode{016F}{LATIN SMALL LETTER U WITH RING ABOVE} \endsetslot \setslot{ydieresis} \Unicode{00FF}{LATIN SMALL LETTER Y WITH DIAERESIS} \endsetslot \setslot{zacute} \Unicode{017A}{LATIN SMALL LETTER Z WITH ACUTE} \endsetslot \setslot{zcaron} \Unicode{017E}{LATIN SMALL LETTER Z WITH CARON} \endsetslot \setslot{zdotaccent} \Unicode{017C}{LATIN SMALL LETTER Z WITH DOT ABOVE} \endsetslot \begincomment \subsubsection{Mainly West-European accented letters in slots 192--255} \endcomment \nextslot{192} \setslot{Agrave} \Unicode{00C0}{LATIN CAPITAL LETTER A WITH GRAVE} \endsetslot \setslot{Aacute} \Unicode{00C1}{LATIN CAPITAL LETTER A WITH ACUTE} \endsetslot \setslot{Acircumflex} \Unicode{00C2}{LATIN CAPITAL LETTER A WITH CIRCUMFLEX} \endsetslot \setslot{Atilde} \Unicode{00C3}{LATIN CAPITAL LETTER A WITH TILDE} \endsetslot \setslot{Adieresis} \Unicode{00C4}{LATIN CAPITAL LETTER A WITH DIAERESIS} \endsetslot \setslot{Aring} \Unicode{00C5}{LATIN CAPITAL LETTER A WITH RING ABOVE} \endsetslot \setslot{AE} \Unicode{00C6}{LATIN CAPITAL LETTER AE} \endsetslot \setslot{Ccedilla} \Unicode{00C7}{LATIN CAPITAL LETTER C WITH CEDILLA} \endsetslot \setslot{Egrave} \Unicode{00C8}{LATIN CAPITAL LETTER E WITH GRAVE} \endsetslot \setslot{Eacute} \Unicode{00C9}{LATIN CAPITAL LETTER E WITH ACUTE} \endsetslot \setslot{Ecircumflex} \Unicode{00CA}{LATIN CAPITAL LETTER E WITH CIRCUMFLEX} \endsetslot \setslot{Edieresis} \Unicode{00CB}{LATIN CAPITAL LETTER E WITH DIAERESIS} \endsetslot \setslot{Igrave} \Unicode{00CC}{LATIN CAPITAL LETTER I WITH GRAVE} \endsetslot \setslot{Iacute} \Unicode{00CD}{LATIN CAPITAL LETTER I WITH ACUTE} \endsetslot \setslot{Icircumflex} \Unicode{00CE}{LATIN CAPITAL LETTER I WITH CIRCUMFLEX} \endsetslot \setslot{Idieresis} \Unicode{00CF}{LATIN CAPITAL LETTER I WITH DIAERESIS} \endsetslot \setslot{Eth} \Unicode{00D0}{LATIN CAPITAL LETTER ETH} \Unicode{0110}{LATIN CAPITAL LETTER D WITH STROKE} \comment{This slot is being used to represent two different letters whose capital forms are identical. The |\lccode| value points to slot \ref{eth}.} \endsetslot \setslot{Ntilde} \Unicode{00D1}{LATIN CAPITAL LETTER N WITH TILDE} \endsetslot \setslot{Ograve} \Unicode{00D2}{LATIN CAPITAL LETTER O WITH GRAVE} \endsetslot \setslot{Oacute} \Unicode{00D3}{LATIN CAPITAL LETTER O WITH ACUTE} \endsetslot \setslot{Ocircumflex} \Unicode{00D4}{LATIN CAPITAL LETTER O WITH CIRCUMFLEX} \endsetslot \setslot{Otilde} \Unicode{00D5}{LATIN CAPITAL LETTER O WITH TILDE} \endsetslot \setslot{Odieresis} \Unicode{00D6}{LATIN CAPITAL LETTER O WITH DIAERESIS} \endsetslot \setslot{OE} \Unicode{0152}{LATIN CAPITAL LIGATURE OE} \comment{This is a single letter, and should not be faked with `OE'.} \endsetslot \setslot{Oslash} \Unicode{00D8}{LATIN CAPITAL LETTER O WITH STROKE} \endsetslot \setslot{Ugrave} \Unicode{00D9}{LATIN CAPITAL LETTER U WITH GRAVE} \endsetslot \setslot{Uacute} \Unicode{00DA}{LATIN CAPITAL LETTER U WITH ACUTE} \endsetslot \setslot{Ucircumflex} \Unicode{00DB}{LATIN CAPITAL LETTER U WITH CIRCUMFLEX} \endsetslot \setslot{Udieresis} \Unicode{00DC}{LATIN CAPITAL LETTER U WITH DIAERESIS} \endsetslot \setslot{Yacute} \Unicode{00DD}{LATIN CAPITAL LETTER Y WITH ACUTE} \endsetslot \setslot{Thorn} \Unicode{00DE}{LATIN CAPITAL LETTER THORN} \endsetslot \setslot{SS} \comment{This is an ``uppercased'' sharp s (`\ss') which always looks exactly like two S's next to each other. In a monowidth font it should be two letters wide. There are other characters which could have made better use of this slot; the only function of this slot which could not have been handled using macros is that |\SS| and |SS| can have different hyphenation.} \label{SS} \charseq{ \Unicode{0053}{LATIN CAPITAL LETTER S} \Unicode{0053}{LATIN CAPITAL LETTER S} } \endsetslot \setslot{agrave} \Unicode{00E0}{LATIN SMALL LETTER A WITH GRAVE} \endsetslot \setslot{aacute} \Unicode{00E1}{LATIN SMALL LETTER A WITH ACUTE} \endsetslot \setslot{acircumflex} \Unicode{00E2}{LATIN SMALL LETTER A WITH CIRCUMFLEX} \endsetslot \setslot{atilde} \Unicode{00E3}{LATIN SMALL LETTER A WITH TILDE} \endsetslot \setslot{adieresis} \Unicode{00E4}{LATIN SMALL LETTER A WITH DIAERESIS} \endsetslot \setslot{aring} \Unicode{00E5}{LATIN SMALL LETTER A WITH RING ABOVE} \endsetslot \setslot{ae} \Unicode{00E6}{LATIN SMALL LETTER AE} \endsetslot \setslot{ccedilla} \Unicode{00E7}{LATIN SMALL LETTER C WITH CEDILLA} \endsetslot \setslot{egrave} \Unicode{00E8}{LATIN SMALL LETTER E WITH GRAVE} \endsetslot \setslot{eacute} \Unicode{00E9}{LATIN SMALL LETTER E WITH ACUTE} \endsetslot \setslot{ecircumflex} \Unicode{00EA}{LATIN SMALL LETTER E WITH CIRCUMFLEX} \endsetslot \setslot{edieresis} \Unicode{00EB}{LATIN SMALL LETTER E WITH DIAERESIS} \endsetslot \setslot{igrave} \Unicode{00EC}{LATIN SMALL LETTER I WITH GRAVE} \endsetslot \setslot{iacute} \Unicode{00ED}{LATIN SMALL LETTER I WITH ACUTE} \endsetslot \setslot{icircumflex} \Unicode{00EE}{LATIN SMALL LETTER I WITH CIRCUMFLEX} \endsetslot \setslot{idieresis} \Unicode{00EF}{LATIN SMALL LETTER I WITH DIAERESIS} \endsetslot \setslot{eth} \Unicode{00F0}{LATIN SMALL LETTER ETH} \label{eth} \endsetslot \setslot{ntilde} \Unicode{00F1}{LATIN SMALL LETTER N WITH TILDE} \endsetslot \setslot{ograve} \Unicode{00F2}{LATIN SMALL LETTER O WITH GRAVE} \endsetslot \setslot{oacute} \Unicode{00F3}{LATIN SMALL LETTER O WITH ACUTE} \endsetslot \setslot{ocircumflex} \Unicode{00F4}{LATIN SMALL LETTER O WITH CIRCUMFLEX} \endsetslot \setslot{otilde} \Unicode{00F5}{LATIN SMALL LETTER O WITH TILDE} \endsetslot \setslot{odieresis} \Unicode{00F6}{LATIN SMALL LETTER O WITH DIAERESIS} \endsetslot \setslot{oe} \Unicode{0153}{LATIN SMALL LIGATURE OE} \comment{This is a single letter, and should not be faked with `oe'.} \endsetslot \setslot{oslash} \Unicode{00F8}{LATIN SMALL LETTER O WITH STROKE} \endsetslot \setslot{ugrave} \Unicode{00F9}{LATIN SMALL LETTER U WITH GRAVE} \endsetslot \setslot{uacute} \Unicode{00FA}{LATIN SMALL LETTER U WITH ACUTE} \endsetslot \setslot{ucircumflex} \Unicode{00FB}{LATIN SMALL LETTER U WITH CIRCUMFLEX} \endsetslot \setslot{udieresis} \Unicode{00FC}{LATIN SMALL LETTER U WITH DIAERESIS} \endsetslot \setslot{yacute} \Unicode{00FD}{LATIN SMALL LETTER Y WITH ACUTE} \endsetslot \setslot{thorn} \Unicode{00FE}{LATIN SMALL LETTER THORN} \endsetslot \setslot{germandbls} \Unicode{00DF}{LATIN SMALL LETTER SHARP S} \endsetslot \begincomment \subsection{Accents} The Unicode equivalents of the accents are taken to be the combining forms of the accents found in code point \texttt{U+0300} and upwards. The reasons for this are (i) that they are primarily used with the |\accent| primitive, i.e., as combining characters, (ii) that some of them (e.g. \texttt{tilde}) have non-combining counterparts elsewhere in the encoding, and (iii) that they are not primarily phonetic marks. \endcomment \nextslot{0} \setslot{grave} \Unicode{0300}{COMBINING GRAVE ACCENT} \endsetslot \setslot{acute} \Unicode{0301}{COMBINING ACUTE ACCENT} \endsetslot \setslot{circumflex} \Unicode{0302}{COMBINING CIRCUMFLEX ACCENT} \endsetslot \setslot{tilde} \Unicode{0303}{COMBINING TILDE} \endsetslot \setslot{dieresis} \Unicode{0308}{COMBINING DIAERESIS} \endsetslot \setslot{hungarumlaut} \Unicode{030B}{COMBINING DOUBLE ACUTE ACCENT} \comment{The long Hungarian umlaut `\H{}'.} \endsetslot \setslot{ring} \Unicode{030A}{COMBINING RING ABOVE} \endsetslot \setslot{caron} \Unicode{030C}{COMBINING CARON} \comment{The caron or h\'a\v cek accent `\v{}'.} \endsetslot \setslot{breve} \Unicode{0306}{COMBINING BREVE} \endsetslot \setslot{macron} \Unicode{0304}{COMBINING MACRON} \endsetslot \setslot{dotaccent} \Unicode{0307}{COMBINING DOT ABOVE} \endsetslot \setslot{cedilla} \Unicode{0327}{COMBINING CEDILLA} \endsetslot \setslot{ogonek} \Unicode{0328}{COMBINING OGONEK} \endsetslot \begincomment \subsection{Digits} \endcomment \nextslot{48} \setslot{zero} \Unicode{0030}{DIGIT ZERO} \endsetslot \setslot{one} \Unicode{0031}{DIGIT ONE} \endsetslot \setslot{two} \Unicode{0032}{DIGIT TWO} \endsetslot \setslot{three} \Unicode{0033}{DIGIT THREE} \endsetslot \setslot{four} \Unicode{0034}{DIGIT FOUR} \endsetslot \setslot{five} \Unicode{0035}{DIGIT FIVE} \endsetslot \setslot{six} \Unicode{0036}{DIGIT SIX} \endsetslot \setslot{seven} \Unicode{0037}{DIGIT SEVEN} \endsetslot \setslot{eight} \Unicode{0038}{DIGIT EIGHT} \endsetslot \setslot{nine} \Unicode{0039}{DIGIT NINE} \endsetslot \begincomment \subsection{Symbols and punctuation} \endcomment \nextslot{13} \setslot{quotesinglbase} \Unicode{201A}{SINGLE LOW-9 QUOTATION MARK} \endsetslot \setslot{guilsinglleft} \Unicode{2039}{SINGLE LEFT-POINTING ANGLE QUOTATION MARK} \comment{In French typography this is an opening quotation mark, but in German typography it is a closing quotation mark. The character should kern well in both cases.} \endsetslot \setslot{guilsinglright} \Unicode{203A}{SINGLE RIGHT-POINTING ANGLE QUOTATION MARK} \comment{In French typography this is a closing quotation mark, but in German typography it is an opening quotation mark. The character should kern well in both cases.} \endsetslot \setslot{quotedblleft} \Unicode{201C}{LEFT DOUBLE QUOTATION MARK} \endsetslot \setslot{quotedblright} \Unicode{201D}{RIGHT DOUBLE QUOTATION MARK} \comment{In English typography this quotation mark is always closing, but in Swedish typography it is also an opening quotation mark. The character should kern well in both cases.} \endsetslot \setslot{quotedblbase} \Unicode{201E}{DOUBLE LOW-9 QUOTATION MARK} \endsetslot \setslot{guillemotleft} \Unicode{00AB}{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK} \comment{In French typography this is an opening quotation mark, but in German typography it is a closing quotation mark. The character should kern well in both cases. The glyph name is misspelt (it should be \texttt{guillemetleft}), but this incorrect name is \emph{de facto} the glyph name used.} \endsetslot \setslot{guillemotright} \Unicode{00BB}{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK} \comment{In French typography this is a closing quotation mark, but in German typography it is an opening quotation mark. The character should kern well in both cases. The glyph name is misspelt (it should be \texttt{guillemetright}), but this incorrect name is \emph{de facto} the glyph name used.} \endsetslot \setslot{endash} \Unicode{2013}{EN DASH} \label{endash} \Ligature{LIG}{hyphen}{emdash} \endsetslot \setslot{emdash} \Unicode{2014}{EM DASH} \comment{In a monowidth font this character is preferably given the width of two normal characters.} \endsetslot \nextslot{32} \setslot{visiblespace} \Unicode{2423}{OPEN BOX} \comment{A visible space glyph `\textvisiblespace'.} \endsetslot \setslot{exclam} \Unicode{0021}{EXCLAMATION MARK} \Ligature{LIG}{quoteleft}{exclamdown} \endsetslot \setslot{quotedbl} \Unicode{0022}{QUOTATION MARK} \comment{The `neutral' double quotation mark, included for use in monowidth fonts, or for setting computer programs.} \endsetslot \setslot{numbersign} \Unicode{0023}{NUMBER SIGN} \endsetslot \setslot{dollar} \Unicode{0024}{DOLLAR SIGN} \endsetslot \setslot{percent} \Unicode{0025}{PERCENT SIGN} \endsetslot \setslot{ampersand} \Unicode{0026}{AMPERSAND} \endsetslot \setslot{quoteright} \Unicode{2019}{RIGHT SINGLE QUOTATION MARK} \Ligature{LIG}{quoteright}{quotedblright} \endsetslot \setslot{parenleft} \Unicode{0028}{LEFT PARENTHESIS} \endsetslot \setslot{parenright} \Unicode{0029}{RIGHT PARENTHESIS} \endsetslot \setslot{asterisk} \Unicode{002A}{ASTERISK} \endsetslot \setslot{plus} \Unicode{002B}{PLUS SIGN} \endsetslot \setslot{comma} \Unicode{002C}{COMMA} \Ligature{LIG}{comma}{quotedblbase} \endsetslot \setslot{hyphen} \Unicode{002D}{HYPHEN-MINUS} \Ligature{LIG}{hyphen}{endash} \Ligature{LIG}{hyphenchar}{hyphenchar} \comment{See also slot \ref{hyphenchar}.} \label{hyphen} \endsetslot \setslot{period} \Unicode{002E}{FULL STOP} \endsetslot \setslot{slash} \Unicode{002F}{SOLIDUS} \endsetslot \skipslots{10} \setslot{colon} \Unicode{003A}{COLON} \endsetslot \setslot{semicolon} \Unicode{003B}{SEMICOLON} \endsetslot \setslot{less} \Unicode{003C}{LESS-THAN SIGN} \Ligature{LIG}{less}{guillemotleft} \endsetslot \setslot{equal} \Unicode{003D}{EQUALS SIGN} \endsetslot \setslot{greater} \Unicode{003E}{GREATER-THAN SIGN} \Ligature{LIG}{greater}{guillemotright} \endsetslot \setslot{question} \Unicode{003F}{QUESTION MARK} \Ligature{LIG}{quoteleft}{questiondown} \endsetslot \setslot{at} \Unicode{0040}{COMMERCIAL AT} \endsetslot \nextslot{91} \setslot{bracketleft} \Unicode{005B}{LEFT SQUARE BRACKET} \endsetslot \setslot{backslash} \Unicode{005C}{REVERSE SOLIDUS} \endsetslot \setslot{bracketright} \Unicode{005D}{RIGHT SQUARE BRACKET} \endsetslot \setslot{asciicircum} \Unicode{005E}{CIRCUMFLEX ACCENT} \comment{The ASCII upward-pointing arrow head `\textasciicircum'. This is included for compatibility with typewriter fonts used for computer listings.} \endsetslot \setslot{underscore} \Unicode{005F}{LOW LINE} \comment{The ASCII underline character `\textunderscore', usually set on the baseline. This is included for compatibility with typewriter fonts used for computer listings.} \endsetslot \setslot{quoteleft} \Unicode{2018}{LEFT SINGLE QUOTATION MARK} \Ligature{LIG}{quoteleft}{quotedblleft} \endsetslot \nextslot{123} \setslot{braceleft} \Unicode{007B}{LEFT CURLY BRACKET} \endsetslot \setslot{bar} \Unicode{007C}{VERTICAL LINE} \comment{The ASCII vertical bar `\textbar'. This is included for compatibility with typewriter fonts used for computer listings.} \endsetslot \setslot{braceright} \Unicode{007D}{RIGHT CURLY BRACKET} \endsetslot \setslot{asciitilde} \Unicode{007E}{TILDE} \comment{The ASCII tilde `\textasciitilde'. This is included for compatibility with typewriter fonts used for computer listings.} \endsetslot \nextslot{159} \setslot{section} \Unicode{00A7}{SECTION SIGN} \endsetslot \nextslot{189} \setslot{exclamdown} \Unicode{00A1}{INVERTED EXCLAMATION MARK} \endsetslot \setslot{questiondown} \Unicode{00BF}{INVERTED QUESTION MARK} \endsetslot \setslot{sterling} \Unicode{00A3}{POUND SIGN} \endsetslot \begincomment \subsection{Miscellanea} \endcomment \nextslot{23} \setslot{compwordmark} \Unicode{200C}{ZERO WIDTH NON-JOINER} \comment{An invisible glyph, with zero width and depth, but the height of lower case letters without ascenders. It is used to stop ligaturing in words like `shelf{}ful'. It is possible that this character should be considered a letter (and that hyphenation patterns should be designed to include it), but since its |\lccode| is 0, \TeX's hyphenation mechanism considers it to be a nonletter which ends the hyphenatable part of a word.} \endsetslot \setslot{perthousandzero} \comment{A glyph which is placed after `\%' to produce a `per-thousand', or twice to produce `per-ten-thousand'. Your guess is as good as mine as to what this glyph should look like in a monowidth font. Since most fonts don't include this glyph anyway, its use is discouraged.} \endsetslot \nextslot{127} \setslot{hyphenchar} \label{hyphenchar} \comment{This character is provided as a discretionary hyphen symbol and may only appear at the end of a line, since it is often hanging out of the character box, but apart from that it is almost always the same as the \texttt{hyphen} character. Since \LaTeX\ sets the |\hyphenchar| of \texttt{T1} fonts to slot \ref{hyphen} rather than this slot, this feature is usually not used.} \endsetslot \begincomment \section{Usual characters} The following characters are only accessed through ligatures and can therefore be replaced by others or completely left out if the ligature programs are modified accordingly. \endcomment \nextslot{27} \setslot{ff} \charseq{ \Unicode{0066}{LATIN SMALL LETTER F} \Unicode{0066}{LATIN SMALL LETTER F} } \comment{This glyph should be two characters wide in a monowidth font.} \ligature{LIG}{i}{ffi} \ligature{LIG}{l}{ffl} \endsetslot \setslot{fi} \charseq{ \Unicode{0066}{LATIN SMALL LETTER F} \Unicode{0069}{LATIN SMALL LETTER I} } \comment{This glyph should be two characters wide in a monowidth font.} \endsetslot \setslot{fl} \charseq{ \Unicode{0066}{LATIN SMALL LETTER F} \Unicode{006C}{LATIN SMALL LETTER L} } \comment{This glyph should be two characters wide in a monowidth font.} \endsetslot \setslot{ffi} \charseq{ \Unicode{0066}{LATIN SMALL LETTER F} \Unicode{0066}{LATIN SMALL LETTER F} \Unicode{0069}{LATIN SMALL LETTER I} } \comment{This glyph should be three characters wide in a monowidth font.} \endsetslot \setslot{ffl} \charseq{ \Unicode{0066}{LATIN SMALL LETTER F} \Unicode{0066}{LATIN SMALL LETTER F} \Unicode{006C}{LATIN SMALL LETTER L} } \comment{This glyph should be three characters wide in a monowidth font.} \endsetslot \nextslot{156} \setslot{IJ} \Unicode{0132}{LATIN CAPITAL LIGATURE IJ} \comment{This is a single letter, and in a monowidth font should ideally be one letter wide.} \endsetslot \nextslot{188} \setslot{ij} \Unicode{0133}{LATIN SMALL LIGATURE IJ} \comment{This is a single letter, and in a monowidth font should ideally be one letter wide.} \endsetslot \begincomment \section{Fontdimens} \endcomment \setfontdimen{1}{italicslant} % italic slant \setfontdimen{2}{interword} % interword space \setfontdimen{3}{stretchword} % interword stretch \setfontdimen{4}{shrinkword} % interword shrink \setfontdimen{5}{xheight} % x-height \setfontdimen{6}{quad} % quad \setfontdimen{7}{extraspace} % extra space after . \setfontdimen{8}{capheight} % cap height \setfontdimen{9}{ascender} % ascender \setfontdimen{10}{acccapheight} % accented cap height \setfontdimen{11}{descender} % descender's depth \setfontdimen{12}{maxheight} % max height \setfontdimen{13}{maxdepth} % max depth \setfontdimen{14}{digitwidth} % digit width \setfontdimen{15}{verticalstem} % dominant width of verical stems \setfontdimen{16}{baselineskip} % baselineskip \begincomment \section{Coding scheme} \endcomment \setstr{codingscheme}{EXTENDED TEX FONT ENCODING - LATIN} \endencoding \section{Discussion} As the \texttt{T1} encoding is an established standard, the discussion of it has already been published elsewhere. [Unfortunately, I do not know where as it all happened long before I started using \TeX. This part of the document definitely needs to be improved. To begin with, I would like to know if the TUGboat articles whose bibliography entries appear in Figure~\ref{Bib-entries} are relevant for the matter. \begin{figure*} \small \begin{verbatim} @Article{Beebe:TB11-2-171, author = "Nelson Beebe", title = "{Character set encoding}", journal = j-TUGboat, volume = "11", number = "2", pages = "171--175", month = jun, year = "1990", ISSN = "0896-3207", bibdate = "Fri Jun 4 18:08:09 MDT 1999", bibsource = "ftp://ftp.math.utah.edu/pub/tex/bib/tugboat.bib; http://www.math.utah.edu/pub/tex/bib/index-table-t.html#tugboat", acknowledgement = ack-bnb # " and " # ack-nhfb, } @Article{Bien:TB11-2-175, author = "Janusz S. Bie{\'n}", title = "{On standards for computer modern font extensions}", journal = j-TUGboat, volume = "11", number = "2", pages = "175--183", month = jun, year = "1990", ISSN = "0896-3207", bibdate = "Fri Jun 4 18:08:09 MDT 1999", bibsource = "ftp://ftp.math.utah.edu/pub/tex/bib/tugboat.bib; http://www.math.utah.edu/pub/tex/bib/index-table-t.html#tugboat", acknowledgement = ack-bnb # " and " # ack-nhfb, } @Article{Ferguson:TB11-4-514, author = "Michael Ferguson", title = "{Report on multilingual activities}", journal = j-TUGboat, volume = "11", number = "4", pages = "514--516", month = nov, year = "1990", ISSN = "0896-3207", bibdate = "Fri Jun 4 18:08:09 MDT 1999", bibsource = "ftp://ftp.math.utah.edu/pub/tex/bib/tugboat.bib; http://www.math.utah.edu/pub/tex/bib/index-table-t.html#tugboat", acknowledgement = ack-bnb # " and " # ack-nhfb, } \end{verbatim} \normalsize \caption{Entries in \texttt{tugboat.bib} which may describe papers that contain (at least part of) the discussion regarding the \texttt{T1} encoding} \label{Bib-entries} \end{figure*} Actually reading them would be nice too, as would of course getting the references for whatever other papers there might be that constituted the discussion of the \texttt{T1} encoding. The file \texttt{cork.enc} in the \textit{Fontname} standard makes a cryptic reference to ``TTN 1(4), December 1992'', whatever that might be.] I can however answer questions on the more immediate matter of how the interpretation of the established standard that this file expresses was constructed. It is mainly the result of a comparison between the file \texttt{t1.etx}~\cite{t1.etx}, the Unicode standard~\cite{Unicode}, and the Adobe glyph list~\cite{AGL}. (Some concerns have been expressed about the fact that the glyph name listed for slot~0 is `\texttt{grave}', although that glyph name in \cite{AGL} corresponds to \textunicode{0060}{GRAVE ACCENT}, whereas \textunicode{0300}{COMBINING GRAVE ACCENT} is listed as corresponding to `\texttt{gravecomb}'. This is however formally irrelevant, since the glyph names are formally only internal labels in the encoding specification. `\texttt{gravecomb}' would furthermore be a more impractical label, since most font contains a `\texttt{grave}' glyph that is used as a combining accent but no `\texttt{gravecomb}' glyph.) \section{Changes} The changes made to this specification since the original 2001/06/19 version are as follows. 2001/09/01: Corrected some typos (reported by Vladimir Volovich). /LH 2001/09/16: Added \cite{latin-page} to the bibliography. /LH 2001/09/21: Removed comment on \textunicode{2012}{FIGURE DASH} from slot~\ref{endash}. \cite[Sec.~6.1]{Unicode} makes quite clear that it's not to be used for ranges. /LH 2002/07/05: Added the Ferguson:TB11-4-514 and ``TTN~1(4)'' references to the discussion section. Also added remark on \texttt{grave} versus \texttt{gravecomb}. /LH 2004/11/28: Changed the f-ligatures to character sequences, which makes more sense for \texttt{CMap}s. Added equivalent for slot~\ref{SS}. /LH \begin{thebibliography}{9} \bibitem{AGL} Adobe Systems Incorporated: \textit{Adobe Glyph List}, 1998; \textsc{http}:/\slash \texttt{partners.adobe.com}\slash \texttt{asn}\slash \texttt{developer}\slash \texttt{type}\slash \texttt{glyphlist.txt} \bibitem{latin-page} Roman Czyborra: \textit{The ISO 8859 Alphabet Soup}, web page at \textsc{http:}/\slash \texttt{czyborra.com}\slash \texttt{charsets}\slash \texttt{iso8859.html}, 1998. \bibitem{ISO8859-1} International Organization for Standardization: \textit{ISO/IEC standard 8859-1} (Information technology -- 8-bit single-byte coded graphic character sets -- Part 1: Latin alphabet No. 1). See also~\cite{latin-page}. \bibitem{t1.etx} Alan Jeffrey, Sebastian Rahtz, and Ulrik Vieth: \textit{The \TeX\ extended text encoding vector}, version~1.801, 1998, \textsc{ctan}:\discretionary{}{}{\thinspace}\texttt{fonts}\slash \texttt{utilities}\slash \texttt{fontinst}\slash \texttt{inputs}\slash \texttt{etx}\slash \texttt{t1.etx}. \bibitem{TeXbook} Donald E.\ Knuth, Duane Bibby (illustrations): \textit{The \TeX book}, Ad\-di\-son--Wes\-ley, 1986; ISBN~0-201-13447-0. \bibitem{Unicode} The Unicode Consortium (editor), \emph{et al.}: \textit{The Unicode Standard, Version 3.0}, Addison Wesley Longman Publisher, 2000; ISBN~0-201-61633-5. Most of the information in this book is also available online at the Unicode consortium website, at \textsc{http}:/\slash \texttt{www.unicode.org}\slash \texttt{unicode}\slash \texttt{uni2book}\slash \texttt{u2.html}. \end{thebibliography} \end{document}