We are migrating CKEditor issue tracking to GitHub. Please, use GitHub to report any new issues.

The former tracking system (this website) will still be available in the read-only mode. All issues reported in the past will still be available publicly and can be referenced.

Important: we decided not to transfer all the tickets to GitHub, as many of them are not reproducible anymore or simply no longer requested by the community.
If the issue you are interested in, can be still reproduced in the latest version of CKEditor, feel free to report it again on GitHub.
At the same time please note that issues reported on this website are still taken into consideration when picking up candidates for next milestones.

To report a new issue, go to https://github.com/ckeditor/ckeditor-dev/issues and follow the instructions in the issue template.

Context Navigation

source: MediaWiki/trunk/maintenance/importUseModWiki.php @ 469

Last change on this file since 469 was 469, checked in by Wiktor Walc, 18 years ago
adding mediawiki 1.10
File size: 9.7 KB

Line
1	<?php
2
3	/**
4	* Import data from a UseModWiki into a MediaWiki wiki
5	* 2003-02-09 Brion VIBBER <brion@pobox.com>
6	* Based loosely on Magnus's code from 2001-2002
7	*
8	* Updated limited version to get something working temporarily
9	* 2003-10-09
10	* Be sure to run the link & index rebuilding scripts!
11	*
12	* Some more munging for charsets etc
13	* 2003-11-28
14	*
15	* Partial fix for pages starting with lowercase letters (??)
16	* and CamelCase and /Subpage link conversion
17	* 2004-11-17
18	*
19	* Rewrite output to create Special:Export format for import
20	* instead of raw SQL. Should be 'future-proof' against future
21	* schema changes.
22	* 2005-03-14
23	*
24	* @todo document
25	* @addtogroup Maintenance
26	*/
27
28	if( php_sapi_name() != 'cli' ) {
29	echo "Please customize the settings and run me from the command line.";
30	die( -1 );
31	}
32
33	/** Set these correctly! */
34	$wgImportEncoding = "CP1252"; /* We convert all to UTF-8 */
35	$wgRootDirectory = "/kalman/Projects/wiki2002/wiki/lib-http/db/wiki";
36
37	/* On a large wiki, you might run out of memory */
38	@ini_set( 'memory_limit', '40M' );
39
40	/* globals */
41	$wgFieldSeparator = "\xb3"; # Some wikis may use different char
42	$FS = $wgFieldSeparator ;
43	$FS1 = $FS."1" ;
44	$FS2 = $FS."2" ;
45	$FS3 = $FS."3" ;
46
47	# Unicode sanitization tools
48	require_once( '../includes/normal/UtfNormal.php' );
49
50	$usercache = array();
51
52	importPages();
53
54	# ------------------------------------------------------------------------------
55
56	function importPages()
57	{
58	global $wgRootDirectory;
59
60	$gt = '>';
61	echo <<<END
62	<?xml version="1.0" encoding="UTF-8" ?$gt
63	<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.1/"
64	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
65	xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.1/
66	http://www.mediawiki.org/xml/export-0.1.xsd"
67	version="0.1"
68	xml:lang="en">
69	<!-- generated by importUseModWiki.php -->
70
71	END;
72	$letters = array(
73	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I',
74	'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R',
75	'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'other' );
76	foreach( $letters as $letter ) {
77	$dir = "$wgRootDirectory/page/$letter";
78	if( is_dir( $dir ) )
79	importPageDirectory( $dir );
80	}
81	echo <<<END
82	</mediawiki>
83
84	END;
85	}
86
87	function importPageDirectory( $dir, $prefix = "" )
88	{
89	echo "\n<!-- Checking page directory " . xmlCommentSafe( $dir ) . " -->\n";
90	$mydir = opendir( $dir );
91	while( $entry = readdir( $mydir ) ) {
92	$m = array();
93	if( preg_match( '/^(.+)\.db$/', $entry, $m ) ) {
94	echo importPage( $prefix . $m[1] );
95	} else {
96	if( is_dir( "$dir/$entry" ) ) {
97	if( $entry != '.' && $entry != '..' ) {
98	importPageDirectory( "$dir/$entry", "$entry/" );
99	}
100	} else {
101	echo "<!-- File '" . xmlCommentSafe( $entry ) . "' doesn't seem to contain an article. Skipping. -->\n";
102	}
103	}
104	}
105	}
106
107
108	# ------------------------------------------------------------------------------
109
110	/* fetch_ functions
111	Grab a given item from the database
112	*/
113
114	function useModFilename( $title ) {
115	$c = substr( $title, 0, 1 );
116	if(preg_match( '/[A-Z]/i', $c ) ) {
117	return strtoupper( $c ) . "/$title";
118	}
119	return "other/$title";
120	}
121
122	function fetchPage( $title )
123	{
124	global $FS1,$FS2,$FS3, $wgRootDirectory;
125
126	$fname = $wgRootDirectory . "/page/" . useModFilename( $title ) . ".db";
127	if( !file_exists( $fname ) ) {
128	echo "Couldn't open file '$fname' for page '$title'.\n";
129	die( -1 );
130	}
131
132	$page = splitHash( $FS1, file_get_contents( $fname ) );
133	$section = splitHash( $FS2, $page["text_default"] );
134	$text = splitHash( $FS3, $section["data"] );
135
136	return array2object( array( "text" => $text["text"] , "summary" => $text["summary"] ,
137	"minor" => $text["minor"] , "ts" => $section["ts"] ,
138	"username" => $section["username"] , "host" => $section["host"] ) );
139	}
140
141	function fetchKeptPages( $title )
142	{
143	global $FS1,$FS2,$FS3, $wgRootDirectory;
144
145	$fname = $wgRootDirectory . "/keep/" . useModFilename( $title ) . ".kp";
146	if( !file_exists( $fname ) ) return array();
147
148	$keptlist = explode( $FS1, file_get_contents( $fname ) );
149	array_shift( $keptlist ); # Drop the junk at beginning of file
150
151	$revisions = array();
152	foreach( $keptlist as $rev ) {
153	$section = splitHash( $FS2, $rev );
154	$text = splitHash( $FS3, $section["data"] );
155	if ( $text["text"] && $text["minor"] != "" && ( $section["ts"]*1 > 0 ) ) {
156	array_push( $revisions, array2object( array ( "text" => $text["text"] , "summary" => $text["summary"] ,
157	"minor" => $text["minor"] , "ts" => $section["ts"] ,
158	"username" => $section["username"] , "host" => $section["host"] ) ) );
159	} else {
160	echo "<!-- skipped a bad old revision -->\n";
161	}
162	}
163	return $revisions;
164	}
165
166	function splitHash ( $sep , $str ) {
167	$temp = explode ( $sep , $str ) ;
168	$ret = array () ;
169	for ( $i = 0; $i+1 < count ( $temp ) ; $i++ ) {
170	$ret[$temp[$i]] = $temp[++$i] ;
171	}
172	return $ret ;
173	}
174
175
176	/* import_ functions
177	Take a fetched item and produce SQL
178	*/
179
180	function checkUserCache( $name, $host )
181	{
182	global $usercache;
183
184	if( $name ) {
185	if( in_array( $name, $usercache ) ) {
186	$userid = $usercache[$name];
187	} else {
188	# If we haven't imported user accounts
189	$userid = 0;
190	}
191	$username = str_replace( '_', ' ', $name );
192	} else {
193	$userid = 0;
194	$username = $host;
195	}
196	return array( $userid, $username );
197	}
198
199	function importPage( $title )
200	{
201	global $usercache;
202
203	echo "\n<!-- Importing page " . xmlCommentSafe( $title ) . " -->\n";
204	$page = fetchPage( $title );
205
206	$newtitle = xmlsafe( str_replace( '_', ' ', recodeText( $title ) ) );
207
208	$munged = mungeFormat( $page->text );
209	if( $munged != $page->text ) {
210	/**
211	* Save a new revision with the conversion, and put the
212	* previous last version into the history.
213	*/
214	$next = array2object( array(
215	'text' => $munged,
216	'minor' => 1,
217	'username' => 'Conversion script',
218	'host' => '127.0.0.1',
219	'ts' => time(),
220	'summary' => 'link fix',
221	) );
222	$revisions = array( $page, $next );
223	} else {
224	/**
225	* Current revision:
226	*/
227	$revisions = array( $page );
228	}
229	$xml = <<<END
230	<page>
231	<title>$newtitle</title>
232
233	END;
234
235	# History
236	$revisions = array_merge( $revisions, fetchKeptPages( $title ) );
237	if(count( $revisions ) == 0 ) {
238	return NULL; // Was "$sql", which does not appear to be defined.
239	}
240
241	foreach( $revisions as $rev ) {
242	$text = xmlsafe( recodeText( $rev->text ) );
243	$minor = ($rev->minor ? '<minor/>' : '');
244	list( /* $userid */ , $username ) = checkUserCache( $rev->username, $rev->host );
245	$username = xmlsafe( recodeText( $username ) );
246	$timestamp = xmlsafe( timestamp2ISO8601( $rev->ts ) );
247	$comment = xmlsafe( recodeText( $rev->summary ) );
248
249	$xml .= <<<END
250	<revision>
251	<timestamp>$timestamp</timestamp>
252	<contributor><username>$username</username></contributor>
253	$minor
254	<comment>$comment</comment>
255	<text>$text</text>
256	</revision>
257
258	END;
259	}
260	$xml .= "</page>\n\n";
261	return $xml;
262	}
263
264	# Whee!
265	function recodeText( $string ) {
266	global $wgImportEncoding;
267	# For currently latin-1 wikis
268	$string = str_replace( "\r\n", "\n", $string );
269	$string = @iconv( $wgImportEncoding, "UTF-8", $string );
270	$string = wfMungeToUtf8( $string ); # Any old Ӓ stuff
271	return $string;
272	}
273
274	function wfUtf8Sequence($codepoint) {
275	if($codepoint < 0x80) return chr($codepoint);
276	if($codepoint < 0x800) return chr($codepoint >> 6 & 0x3f \| 0xc0) .
277	chr($codepoint & 0x3f \| 0x80);
278	if($codepoint < 0x10000) return chr($codepoint >> 12 & 0x0f \| 0xe0) .
279	chr($codepoint >> 6 & 0x3f \| 0x80) .
280	chr($codepoint & 0x3f \| 0x80);
281	if($codepoint < 0x100000) return chr($codepoint >> 18 & 0x07 \| 0xf0) . # Double-check this
282	chr($codepoint >> 12 & 0x3f \| 0x80) .
283	chr($codepoint >> 6 & 0x3f \| 0x80) .
284	chr($codepoint & 0x3f \| 0x80);
285	# Doesn't yet handle outside the BMP
286	return "&#$codepoint;";
287	}
288
289	function wfMungeToUtf8($string) {
290	$string = preg_replace ( '/&#([0-9]+);/e', 'wfUtf8Sequence($1)', $string );
291	$string = preg_replace ( '/&#x([0-9a-f]+);/ie', 'wfUtf8Sequence(0x$1)', $string );
292	# Should also do named entities here
293	return $string;
294	}
295
296	function timestamp2ISO8601( $ts ) {
297	#2003-08-05T18:30:02Z
298	return gmdate( 'Y-m-d', $ts ) . 'T' . gmdate( 'H:i:s', $ts ) . 'Z';
299	}
300
301	function xmlsafe( $string ) {
302	/**
303	* The page may contain old data which has not been properly normalized.
304	* Invalid UTF-8 sequences or forbidden control characters will make our
305	* XML output invalid, so be sure to strip them out.
306	*/
307	$string = UtfNormal::cleanUp( $string );
308
309	$string = htmlspecialchars( $string );
310	return $string;
311	}
312
313	function xmlCommentSafe( $text ) {
314	return str_replace( '--', '\\-\\-', xmlsafe( recodeText( $text ) ) );
315	}
316
317
318	function array2object( $arr ) {
319	$o = (object)0;
320	foreach( $arr as $x => $y ) {
321	$o->$x = $y;
322	}
323	return $o;
324	}
325
326
327	/**
328	* Make CamelCase and /Talk links work
329	*/
330	function mungeFormat( $text ) {
331	global $nowiki;
332	$nowiki = array();
333	$staged = preg_replace_callback(
334	'/(<nowiki>.*?<\\/nowiki>\|(?:http\|https\|ftp):\\S+\|\[\[[^]\\n]+]])/s',
335	'nowikiPlaceholder', $text );
336
337	# This is probably not 100% correct, I'm just
338	# glancing at the UseModWiki code.
339	$upper = "[A-Z]";
340	$lower = "[a-z_0-9]";
341	$any = "[A-Za-z_0-9]";
342	$camel = "(?:$upper+$lower+$upper+$any*)";
343	$subpage = "(?:\\/$any+)";
344	$substart = "(?:\\/$upper$any*)";
345
346	$munged = preg_replace( "/(?!\\[\\[)($camel$subpage\|$substart$subpage)\\b(?!\\]\\]\|>)/",
347	'[[$1]]', $staged );
348
349	$final = preg_replace( '/' . preg_quote( placeholder() ) . '/es',
350	'array_shift( $nowiki )', $munged );
351	return $final;
352	}
353
354
355	function placeholder( $x = null ) {
356	return '\xffplaceholder\xff';
357	}
358
359	function nowikiPlaceholder( $matches ) {
360	global $nowiki;
361	$nowiki[] = $matches[1];
362	return placeholder();
363	}
364
365	?>

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

© 2003 – 2022, CKSource sp. z o.o. sp.k. All rights reserved. | Terms of use | Privacy policy