Benutzer:Kolossos/SQL-Oberstübchen

aus Wikipedia, der freien Enzyklopädie
Zur Navigation springen Zur Suche springen

Georeferenzierung[Bearbeiten | Quelltext bearbeiten]

  • wp-world/load-dispenser.php
  • Aufräumen:
DELETE FROM wp_coords_new0 WHERE lat = 0 AND lon = 0;
DELETE FROM wp_coords_new0 WHERE "Titel" like 'List of %';
DELETE FROM wp_coords_new0 WHERE "T_de" LIKE '%Oberliga%';
DELETE FROM wp_coords_new0 WHERE "T_de" LIKE '%Regionalliga%';
DELETE FROM wp_coords_new0 WHERE "T_de" LIKE '%German_Football_League%';

  • Sortieren: nach psize absteigend
ALTER TABLE `pub_CSV_test3_neu` ORDER BY `psize` DESC;
  • Kopieren nach pub_C_geo_id_neu
ALTER TABLE `pub_C_geo_id_neu`  ADD `ID` INT(9) NOT NULL AUTO_INCREMENT PRIMARY KEY FIRST;
  • Wissen was los ist
rmytop -h sql-s1 -h sql-s2 -h sql-s3 -u kolossos
KILL ID
  • Statistik

SELECT lang, COUNT(*) FROM `pub_CSV-test` GROUP BY lang SELECT style, COUNT(*)FROM `pub_CSV_test3` GROUP BY style ORDER BY `COUNT( * )` DESC

  • Duplikate entfernen
delete test from test,
(select count(*) as counter,
 min(page) as page,
 test2
 from test
 group by test2
 having counter > 1) as doppel
 where test.test2 = doppel.test2 and
 test.page != doppel.page
  • Komplettes laden des Errorfiles

LOAD DATA LOCAL INFILE '/home/sk/data/geo/de/coordinates_de_sort.txt' IGNORE INTO TABLE `pub_CSV_error1` FIELDS TERMINATED BY '\t' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n'( `t` ,`lang` , `Titel` , `error_nr` , `message` , `coor_bad` , `coor_nr` , `templates` , `t` , `t` ,`lat`,`t` , `t` , `t` , `t` ,`lon`,`t` , `t` , `t` , `t`,`t` ,`type`,`pop`,`t`,`style`,`t`,`Country`,`Subregion`,`t`,`t`,`t`,`t`,`t`,`t`,`t`,`image`,`psize`,`Category`,`Titel_en`,`Titel_de`,`Titel_fr`,`Titel_pl`,`Titel_ja`,`Titel_nl`,`Titel_it`,`Titel_pt`,`Titel_sv`,`Titel_es`,`Titel_ru` )

  • Map1
SELECT ROUND(`Lon`,0),ROUND(`Lat`,0),  COUNT(*)  FROM `geo_de_old` GROUP BY ROUND(`Lon`,0),ROUND(`Lat`,0)
  • Maybe
LOAD DATA LOCAL INFILE '/home/sk/data/geo/de/de_output_01_coordinates_maybe.txt' IGNORE INTO TABLE `pub_maybe` FIELDS TERMINATED BY '\t'     OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n'(
`lang` , `title` , `t` , `t` ,`t` , `t` ,  `t` , `t` ,`Cat`) 
  • Kopieren eines DB-Ausschnittes:
INSERT INTO `u_kolossos`.`pub_de-wiki`
SELECT `page_title`,`page_len`
FROM `dewiki_p`.`page` WHERE `page_namespace` =0
  • ALTER TABLE `pub_CSV_test3_si`
  CHARACTER SET 'utf8'
  • Maybe-Checker

Button Statistik 27.01.07 2 380 3 2113 4 427

SELECT ready, COUNT(*) FROM `pub_maybe` GROUP BY ready

  • Vergleich alter und neuer DB
Select x.lang,x."Titel" FROM wp_coords_new0 x WHERE x."Titel" NOT IN (Select "Titel" from wp_coords_red0)  LIMIT 2

SQL-Allgemein[Bearbeiten | Quelltext bearbeiten]

  • Dump über die Konsole

mysqldump u_kolossos_geo_p pub_C_geo_id > pub_C_geo_id-dispens.sql

  • einspielen:
mysql -h fiwiki-p.db.toolserver.org -u kolossos -p
 use u_kolossos
 source /home/kolossos/public_html/wp-world/einspielen/pub_C_geo_id

CSV-Dump:

sql u_kolossos_geo_p </home/kolossos/public_html/wp-world/utf8/NeueCSVDatei.sql > /home/kolossos/public_html/wp-world/utf8/geo_id_utf.txt
  • Tabellen abgleichen mit Unterabfragen

SELECT * FROM `pub_maybe` WHERE title NOT IN ( SELECT Titel FROM `pub_CSV_test3`) AND `ready` =3 LIMIT 0 , 30

  • Deutsche Artikel ohne de Koordinaten

SELECT * FROM `pub_CSV_test3` WHERE `lang` NOT LIKE 'de' AND `Titel_de` IS NOT NULL ORDER BY `psize` DESC

  • Suche nach underlines
SELECT * FROM `pub_CSV_test3` WHERE `Titel_en` LIKE '%\_%' LIMIT 0 , 30
  • Interwikilinken
SELECT "da" lang, `ll_from` fom,
(SELECT `page_title` FROM `page` WHERE `page_id`=fom) Title,
(SELECT `page_len` FROM `page` WHERE `page_id`=fom) psizze,
ll_lang, lat,lon,type,pop,Height,Country,Subregion,Scale,dim,psize ,style,image,imagejpg,name 
FROM `langlinks` a,u_kolossos.`pub_CSV_test3` k WHERE  
a.ll_title=k.Titel AND a.ll_lang=k.lang Limit 100

Thumbs[Bearbeiten | Quelltext bearbeiten]

http://tools.wikimedia.de/~kolossos/wp-world/thumbs-wget2.php?so=co&path=/mnt/usb/

Templatetiger[Bearbeiten | Quelltext bearbeiten]

  • qcronsub -l h_rt=180:30:00 -l virtual_free=800M "$HOME/public_html/templatetiger/einspielen.sh"
    • qstat

*sql u_kolossos_tt_p < /home/kolossos/public_html/templatetiger/einspielen.sql

SELECT count( * ) , `tp_name` FROM `pub_tt1` GROUP BY `tp_name` ORDER BY count( * ) DESC LIMIT 0 , 30

  • Einspielen

mysql> use u_kolossos mysql> LOAD DATA LOCAL INFILE '/home/sk/data/templatetiger/dewiki/dewiki_templatetiger.txt' IGNORE INTO TABLE `pub_tt1_dewiki2` FIELDS TERMINATED BY '\t' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '\\' LINES TERMINATED BY '\n';

  • Summen-Tabelle füllen

mysql> INSERT INTO `u_kolossos`.`pub_tt1_ru_sum_neu` SELECT `tp_name` , COUNT( * ) FROM `pub_tt1_ru_neu` WHERE `entry_nr` = '0' GROUP BY `tp_name` ORDER BY COUNT( * ) DESC;

  • INSERT INTO `u_kolossos_yarrow`.`pub_tt1_de_para` SELECT `tp_name` , `entry_name`, COUNT( * ) FROM `pub_tt1_de` GROUP BY `tp_name`,`entry_name`;

Personendaten[Bearbeiten | Quelltext bearbeiten]

INSERT INTO `u_kolossos`.`pub_personendaten_de`
SELECT DISTINCT `name` a, 
(SELECT `Value` FROM `pub_tt1_de` WHERE `entry_name` LIKE 'ALTERNATIVNAMEN' AND `tp_name` LIKE 'Personendaten' AND `name` = a LIMIT 1	) ALTERNATIVNAMEN,
(SELECT `Value` FROM `pub_tt1_de` WHERE `entry_name` LIKE 'KURZBESCHREIBUNG' AND `tp_name` LIKE 'Personendaten' AND `name` = a LIMIT 1) KURZBESCHREIBUNG,
(SELECT `Value` FROM `pub_tt1_de` WHERE `entry_name` LIKE 'GEBURTSDATUM' AND `tp_name` LIKE 'Personendaten' AND `name` = a LIMIT 1) GEBURTSDATUM,
(SELECT `Value` FROM `pub_tt1_de` WHERE `entry_name` LIKE 'GEBURTSORT' AND `tp_name` LIKE 'Personendaten' AND `name` = a LIMIT 1	) GEBURTSORT,
(SELECT `Value`FROM `pub_tt1_de` WHERE `entry_name` LIKE 'STERBEDATUM' AND `tp_name` LIKE 'Personendaten' AND `name` = a LIMIT 1) STERBEDATUM,
(SELECT `Value` FROM `pub_tt1_de` WHERE `entry_name` LIKE 'STERBEORT' AND `tp_name` LIKE 'Personendaten'	AND `name` = a LIMIT 1) STERBEORT,
(SELECT `Value` FROM `pub_tt1_de` WHERE `tp_name` LIKE 'PND'	AND `name` = a LIMIT 1) PND
FROM `pub_tt1_de` WHERE `tp_name` LIKE 'Personendaten'

Wiki[Bearbeiten | Quelltext bearbeiten]

Text-Austausch
UPDATE `test`.`stringreplace` SET `Text` = REPLACE( `Text` , 'DM', 'Euro' )
UPDATE `pub_tt1_de` SET `tp_name` = REPLACE( `tp_name` , '#',  )  WHERE `tp_name` LIKE 'if:%'
NULL durch "" ersetzen

COALESCE(`Value`,"")

Tests[Bearbeiten | Quelltext bearbeiten]

  • Cat

Select `page_title` as pt,`page_id`as pid, (Select Count(*) FROM `categorylinks` WHERE `cl_to`=pt ) FROM `page` WHERE `page_namespace`=14 AND `page_title` like "%physik%"

Select `page_title` as pt,`page_id`,
(Select Count(*) FROM `categorylinks`,`page`
 WHERE `page`.`page_id`=`categorylinks`.`cl_from` AND 
`page`.`page_namespace` =14
AND `categorylinks`.`cl_to`=pt) FROM `page` WHERE 
`page_namespace`=14 AND `page_title` like "Physik%"

Hallo, vorgeraumer Zeit hatten wir ja fast zeitgleich das Cattree-Prinzip entwickelt. Die permanenten Pluszeichen vor den Kategorien, auch wenn diese keine weiteren Kategorien enthalten, hatte mich schon damals etwas gestört. Jetzt hat die Entwicklung ja in den Einzug ins Mediawiki gehalten, gerade in den tiefen Kategorien trifft man sehr häufig auf Kategorien, welche keine weiteren Kats. enthalten, wo man also umsonst klickt und umsonst Traffic erzeugt. Damals war dein mir einleuchtendes Argument das erzeugen von vielen SQL-Aufrufen. Heute bin ich der Sache nochmal auf die Spur gegangen, schließlich hat man in der Zwischenzeit vielleicht etwwas zugelernt, durch die Verwendung von Unterabfragen kann man recht Resourcen schonend sich an die Umsetzung machen.

  • umkreis-images

SELECT ROUND(1*`Lat`,0)/1 ,ROUND(1*`Lon`,0)/1, MAX(`psize`),`Titel`,`imagejpg` FROM `pub_CSV_test3` WHERE `imagejpg` NOT LIKE GROUP BY ROUND(1*`Lat`,0)/1,ROUND(1*`Lon`,0)/1 HAVING COUNT(*)>0 http://tools.wikimedia.de/~kolossos/wp-world/umkreis-images.php?la=de&submit=-&lon=0&lat=0&rang=27000&map=2

  • SELECT Round( uid /3 ) *3a, AVG( `pid` ) FROM `pages_old` GROUP BY a LIMIT 0 , 30
  • Monatliches Aktivität und Artikel Umfang eines Artikels:
SELECT Count(*),rev_text_id text_id,
(SELECT CHAR_LENGTH(`old_text`) FROM `text` WHERE `old_id` = text_id),Round(`rev_timestamp`/100000000) 
a FROM `revision` WHERE `rev_page`=(SELECT `page_id` 
FROM `page` Where `page_title` LIKE 'Hauptseite' LIMIT 1) Group by  a

Irrsinn[Bearbeiten | Quelltext bearbeiten]

  • num. Integration
$query = mysql_query("SET @v1 =-4;");
$sql="SELECT ID, 0.000002 * ID time,
(@ch1now := C1) C1, round( (@v1 := (@v1 + 0.21 * 0.02*@ch1now ) ) , 8) v1
FROM `id-a1`LIMIT 0 , 31500;";
$query = mysql_query($sql);

"Para"-noid[Bearbeiten | Quelltext bearbeiten]

CREATE TABLE kolossos3 SELECT Titel_en AS title, lat, lon FROM pub_CSV_test3 WHERE Titel_en IS NOT NULL;

ALTER TABLE kolossos3 ADD location POINT; ALTER TABLE kolossos3 MODIFY title VARCHAR(255) NOT NULL COLLATE latin1_bin;

UPDATE kolossos3 SET title=TRIM('_' FROM REPLACE(title,' ','_')), location=PointFromText(CONCAT( 'Point(', lon, ' ', lat, ')' ));

SELECT *,COUNT(*) FROM kolossos3 GROUP BY title HAVING COUNT(title)>1;

  1. delete one of each duplicate from the previous query by changing the following title field ... stupid manual solution

DELETE FROM kolossos3 WHERE title='xxx' LIMIT 1;

ALTER TABLE kolossos3 engine=myisam; ALTER TABLE kolossos3 ADD PRIMARY KEY (title); ALTER TABLE kolossos3 MODIFY location POINT NOT NULL; ALTER TABLE kolossos3 ADD SPATIAL KEY `location` (`location`);


Commons[Bearbeiten | Quelltext bearbeiten]

  • SELECT Count(*) C,`rc_user_text` FROM `recentchanges` group by rc_user order by C DESC
  • SELECT Count( * ) C, `rc_user_text` , ROUND( `rc_cur_time` /10000, 0 ) timer FROM `recentchanges` GROUP BY rc_user, timer ORDER BY C DESC

Bewertung[Bearbeiten | Quelltext bearbeiten]

SELECT avg(`rfp_ave_val`),count(`rfp_ave_val`),`rfp_page_id`,`page_title` FROM `reader_feedback_pages`,`page` where `rfp_page_id`=`page_id` GROUP BY`rfp_page_id`

Log-files[Bearbeiten | Quelltext bearbeiten]

http://wikistics.falsikon.de/dumps.htm

wget -> bunzip2 -> mysql use u_kolossos_wp_logs_p

LOAD DATA LOCAL INFILE '/home/kolossos/data/2009_181d.log' IGNORE INTO TABLE `2009_181d` FIELDS TERMINATED BY ' ' ESCAPED BY '\\' LINES TERMINATED BY '\n';



PHP-Errors

tail -f /var/log/userlog | grep $LOGNAME

Reg. Ausdrücke[Bearbeiten | Quelltext bearbeiten]

Suchen: \[http://maps\.google\..*ll=([^,]+),([^&]+)&spn=.*\]
Ersetzen: {{Coordinate|NS=\1|EW=\2|type=landmark|dim=100|region=RU-MOW}}

OSM[Bearbeiten | Quelltext bearbeiten]

Im Cache alles lösche was älter als 30 Tage ist: find /home/kolossos/public_html/osm/cache -type f -ctime +30 -exec rm \{\} \;

Exiftool[Bearbeiten | Quelltext bearbeiten]

exiftool -tagfromfile alt.jpg neu.jpg


PostGIS[Bearbeiten | Quelltext bearbeiten]

ssh -L 4711:osmdb.eqiad.wmnet:5432 kolossos@tools-login.wmflabs.org (2015)

Strings ersetzen: sed -i -- 's/\\0\\0//g' *
database layout
public | geometry_columns                          | table | gis         1116
public | planet_osm_line                           | table | gis     25937305
public | planet_osm_nodes                          | table | gis    387769508
public | planet_osm_point                          | table | gis      7492814
public | planet_osm_polygon                        | table | gis      3764502
public | planet_osm_rels                           | table | gis       143715
public | planet_osm_roads                          | table | gis      2373290
public | planet_osm_ways                           | table | gis     30435611
public | spatial_ref_sys                           | table | gis         3163
start
  • psql -h osmdb.eqiad.wmnet -d gis -U osm (2015)
  • psql -h sql-mapnik -d osm_mapnik
Kill queries
SELECT * FROM pg_stat_activity; (procpid)
SELECT pg_cancel_backend(pid int);


Postgis Datenbank erzeugen[Bearbeiten | Quelltext bearbeiten]

createdb -E UTF8 -O username datenbankname
createlang plpgsql datenbankname
psql -d datenbankname -f /usr/share/postgresql-8.3-postgis/lwpostgis.sql
psql -d datenbankname -f /usr/share/postgresql-8.3-postgis/spatial_ref_sys.sql

WP in Postgis[Bearbeiten | Quelltext bearbeiten]

CREATE TABLE wp_coords_red0wd4
(
  lang character varying(10),
  "Titel" character varying(255),
  lat double precision,
  lon double precision,
  types character varying(50),
  pop character varying(50),
  "Height" character varying(50),
  "Country" character varying(10),
  "Subregion" character varying(255),
  "Scale" character varying(10),
  dim character varying(30),
  psize double precision,
  style character varying(50),
  t character varying(10),
  image character varying(255),
  imagejpg character varying(255),
  name character varying(255),
  page_id double precision,
  "T_wikidata" character varying(255),
  "T_aa" character varying(255),
  "T_ab" character varying(255),
  "T_ace" character varying(255),
  "T_af" character varying(255),
  "T_ak" character varying(255),
  "T_als" character varying(255),
  "T_am" character varying(255),
  "T_an" character varying(255),
  "T_ang" character varying(255),
  "T_ar" character varying(255),
  "T_arc" character varying(255),
  "T_arz" character varying(255),
  "T_ast" character varying(255),
  "T_av" character varying(255),
  "T_ay" character varying(255),
  "T_az" character varying(255),
  "T_ba" character varying(255),
  "T_bar" character varying(255),
  "T_bat-smg" character varying(255),
  "T_bcl" character varying(255),
  "T_be" character varying(255),
  "T_be-x-old" character varying(255),
  "T_bg" character varying(255),
  "T_bh" character varying(255),
  "T_bi" character varying(255),
  "T_bm" character varying(255),
  "T_bn" character varying(255),
  "T_bo" character varying(255),
  "T_bpy" character varying(255),
  "T_br" character varying(255),
  "T_bs" character varying(255),
  "T_bug" character varying(255),
  "T_bxr" character varying(255),
  "T_ca" character varying(255),
  "T_cbk-zam" character varying(255),
  "T_cdo" character varying(255),
  "T_ce" character varying(255),
  "T_ceb" character varying(255),
  "T_ch" character varying(255),
  "T_cho" character varying(255),
  "T_chr" character varying(255),
  "T_chy" character varying(255),
  "T_ckb" character varying(255),
  "T_co" character varying(255),
  "T_cr" character varying(255),
  "T_crh" character varying(255),
  "T_cs" character varying(255),
  "T_csb" character varying(255),
  "T_cu" character varying(255),
  "T_cv" character varying(255),
  "T_cy" character varying(255),
  "T_da" character varying(255),
  "T_de" character varying(255),
  "T_diq" character varying(255),
  "T_dsb" character varying(255),
  "T_dv" character varying(255),
  "T_dz" character varying(255),
  "T_ee" character varying(255),
  "T_el" character varying(255),
  "T_eml" character varying(255),
  "T_en" character varying(255),
  "T_en-simple" character varying(255),
  "T_eo" character varying(255),
  "T_es" character varying(255),
  "T_et" character varying(255),
  "T_eu" character varying(255),
  "T_ext" character varying(255),
  "T_fa" character varying(255),
  "T_ff" character varying(255),
  "T_fi" character varying(255),
  "T_fiu-vro" character varying(255),
  "T_fj" character varying(255),
  "T_fo" character varying(255),
  "T_fr" character varying(255),
  "T_frp" character varying(255),
  "T_fur" character varying(255),
  "T_fy" character varying(255),
  "T_ga" character varying(255),
  "T_gan" character varying(255),
  "T_gd" character varying(255),
  "T_gl" character varying(255),
  "T_glk" character varying(255),
  "T_gn" character varying(255),
  "T_got" character varying(255),
  "T_gu" character varying(255),
  "T_gv" character varying(255),
  "T_ha" character varying(255),
  "T_hak" character varying(255),
  "T_haw" character varying(255),
  "T_he" character varying(255),
  "T_hi" character varying(255),
  "T_hif" character varying(255),
  "T_ho" character varying(255),
  "T_hr" character varying(255),
  "T_hsb" character varying(255),
  "T_ht" character varying(255),
  "T_hu" character varying(255),
  "T_hy" character varying(255),
  "T_hz" character varying(255),
  "T_ia" character varying(255),
  "T_id" character varying(255),
  "T_ie" character varying(255),
  "T_ig" character varying(255),
  "T_ii" character varying(255),
  "T_ik" character varying(255),
  "T_ilo" character varying(255),
  "T_io" character varying(255),
  "T_is" character varying(255),
  "T_it" character varying(255),
  "T_iu" character varying(255),
  "T_ja" character varying(255),
  "T_jbo" character varying(255),
  "T_jv" character varying(255),
  "T_ka" character varying(255),
  "T_kaa" character varying(255),
  "T_kab" character varying(255),
  "T_kg" character varying(255),
  "T_ki" character varying(255),
  "T_kj" character varying(255),
  "T_kk" character varying(255),
  "T_kl" character varying(255),
  "T_km" character varying(255),
  "T_kn" character varying(255),
  "T_ko" character varying(255),
  "T_kr" character varying(255),
  "T_ks" character varying(255),
  "T_ksh" character varying(255),
  "T_ku" character varying(255),
  "T_kv" character varying(255),
  "T_kw" character varying(255),
  "T_ky" character varying(255),
  "T_la" character varying(255),
  "T_lad" character varying(255),
  "T_lb" character varying(255),
  "T_lbe" character varying(255),
  "T_lg" character varying(255),
  "T_li" character varying(255),
  "T_lij" character varying(255),
  "T_lmo" character varying(255),
  "T_ln" character varying(255),
  "T_lo" character varying(255),
  "T_lt" character varying(255),
  "T_lv" character varying(255),
  "T_map-bms" character varying(255),
  "T_mdf" character varying(255),
  "T_mg" character varying(255),
  "T_mh" character varying(255),
  "T_mhr" character varying(255),
  "T_mi" character varying(255),
  "T_mk" character varying(255),
  "T_ml" character varying(255),
  "T_mn" character varying(255),
  "T_mo" character varying(255),
  "T_mr" character varying(255),
  "T_ms" character varying(255),
  "T_mt" character varying(255),
  "T_mus" character varying(255),
  "T_mwl" character varying(255),
  "T_my" character varying(255),
  "T_myv" character varying(255),
  "T_mzn" character varying(255),
  "T_na" character varying(255),
  "T_nah" character varying(255),
  "T_nap" character varying(255),
  "T_nds" character varying(255),
  "T_nds-nl" character varying(255),
  "T_ne" character varying(255),
  "T_new" character varying(255),
  "T_ng" character varying(255),
  "T_nl" character varying(255),
  "T_nn" character varying(255),
  "T_no" character varying(255),
  "T_nostalgia" character varying(255),
  "T_nov" character varying(255),
  "T_nrm" character varying(255),
  "T_nv" character varying(255),
  "T_ny" character varying(255),
  "T_oc" character varying(255),
  "T_om" character varying(255),
  "T_or" character varying(255),
  "T_os" character varying(255),
  "T_pa" character varying(255),
  "T_pag" character varying(255),
  "T_pam" character varying(255),
  "T_pap" character varying(255),
  "T_pcd" character varying(255),
  "T_pdc" character varying(255),
  "T_pi" character varying(255),
  "T_pih" character varying(255),
  "T_pl" character varying(255),
  "T_pms" character varying(255),
  "T_pnb" character varying(255),
  "T_pnt" character varying(255),
  "T_ps" character varying(255),
  "T_pt" character varying(255),
  "T_qu" character varying(255),
  "T_rm" character varying(255),
  "T_rmy" character varying(255),
  "T_rn" character varying(255),
  "T_ro" character varying(255),
  "T_roa-rup" character varying(255),
  "T_roa-tara" character varying(255),
  "T_ru" character varying(255),
  "T_rw" character varying(255),
  "T_sa" character varying(255),
  "T_sah" character varying(255),
  "T_sc" character varying(255),
  "T_scn" character varying(255),
  "T_sco" character varying(255),
  "T_sd" character varying(255),
  "T_se" character varying(255),
  "T_sg" character varying(255),
  "T_sh" character varying(255),
  "T_si" character varying(255),
  "T_simple" character varying(255),
  "T_sk" character varying(255),
  "T_sl" character varying(255),
  "T_sm" character varying(255),
  "T_sn" character varying(255),
  "T_so" character varying(255),
  "T_sq" character varying(255),
  "T_sr" character varying(255),
  "T_srn" character varying(255),
  "T_ss" character varying(255),
  "T_st" character varying(255),
  "T_stq" character varying(255),
  "T_su" character varying(255),
  "T_sv" character varying(255),
  "T_sw" character varying(255),
  "T_szl" character varying(255),
  "T_ta" character varying(255),
  "T_te" character varying(255),
  "T_tet" character varying(255),
  "T_tg" character varying(255),
  "T_th" character varying(255),
  "T_ti" character varying(255),
  "T_tk" character varying(255),
  "T_tl" character varying(255),
  "T_tlh" character varying(255),
  "T_tn" character varying(255),
  "T_to" character varying(255),
  "T_tokipona" character varying(255),
  "T_tpi" character varying(255),
  "T_tr" character varying(255),
  "T_ts" character varying(255),
  "T_tt" character varying(255),
  "T_tum" character varying(255),
  "T_tw" character varying(255),
  "T_ty" character varying(255),
  "T_udm" character varying(255),
  "T_ug" character varying(255),
  "T_uk" character varying(255),
  "T_ur" character varying(255),
  "T_uz" character varying(255),
  "T_ve" character varying(255),
  "T_vec" character varying(255),
  "T_vi" character varying(255),
  "T_vls" character varying(255),
  "T_vo" character varying(255),
  "T_wa" character varying(255),
  "T_war" character varying(255),
  "T_wo" character varying(255),
  "T_wuu" character varying(255),
  "T_xal" character varying(255),
  "T_xh" character varying(255),
  "T_yi" character varying(255),
  "T_yo" character varying(255),
  "T_za" character varying(255),
  "T_zea" character varying(255),
  "T_zh" character varying(255),
  "T_zh-classical" character varying(255),
  "T_zh-min-nan" character varying(255),
  "T_zh-yue" character varying(255),
  "T_zu" character varying(255),
  the_geom geometry,
  arms character varying(255),
  instance character varying(40),
  classes integer[],
  superclasses integer[]
)

\copy "pub_C_geo_id" FROM  '/home/kolossos/public_html/wp-world/einspielen-files2/dewiki-limit' WITH DELIMITER '\t'  NULL 'NULL'  CSV HEADER
 
SELECT AddGeometryColumn('public', 'pub_C_geo_id', 'the_geom', 4326, 'POINT', 2);
UPDATE "pub_C_geo_id" SET the_geom = ST_SetSRID(ST_Point(lon, lat),4326);
CREATE INDEX idx_pub_C_geo_id_the_geom ON "pub_C_geo_id" USING GIST (the_geom);

name:*[Bearbeiten | Quelltext bearbeiten]

Select count(*) from planet_polygon
where strpos(array_to_string(akeys(tags),','),'name:')>0  limit 15

Reduce Map[Bearbeiten | Quelltext bearbeiten]

CREATE TABLE wp_coords_red2 as
select * from wp_coords_red0 a where  
(SELECT Count(*) from wp_coords_red0 b where b.the_geom
&& ST_SetSRID(ST_MakeBox2D(ST_Point(a.lon-.5,a.lat-.5),
ST_Point(a.lon+.5,a.lat+.5)),4326)
and b.psize>a.psize)<80;

CREATE INDEX idx_the_geom2  ON wp_coords_red2  USING gist  (the_geom);
Vacuum Analyze wp_coords_red2;

CREATE TABLE wp_coords_red3 as
select * from wp_coords_red2 a where  
(SELECT Count(*) from wp_coords_red2 b where b.the_geom
&& ST_SetSRID(ST_MakeBox2D(ST_Point(a.lon-5,a.lat-5),
ST_Point(a.lon+5,a.lat+5)),4326)
and b.psize>a.psize)<80;

CREATE INDEX idx_the_geom3  ON wp_coords_red3  USING gist  (the_geom);
Vacuum Analyze wp_coords_red3;

CREATE TABLE wp_coords_red4 as
select * from wp_coords_red3 a where  
(SELECT Count(*) from wp_coords_red3 b where b.the_geom
&& ST_SetSRID(ST_MakeBox2D(ST_Point(a.lon-50,a.lat-50),
ST_Point(a.lon+50,a.lat+50)),4326)
and b.psize>a.psize)<80;

CREATE INDEX idx_the_geom4  ON wp_coords_red4  USING gist  (the_geom);
Vacuum Analyze wp_coords_red4;


GRANT SELECT ON TABLE "pub_C_geo_id" TO public;
GRANT SELECT ON TABLE wp_coords_red0 TO public;
GRANT SELECT ON TABLE wp_coords_red2 TO public;
GRANT SELECT ON TABLE wp_coords_red3 TO public;
GRANT SELECT ON TABLE wp_coords_red4 TO public;

SSH[Bearbeiten | Quelltext bearbeiten]

ssh -L 4711:osmdb.eqiad.wmnet:5432 kolossos@tools-login.wmflabs.org

Dann lokal localhost und port 4711 aufrufen. --Kolossos 19:37, 2. Dez. 2014 (CET)

Dumps[Bearbeiten | Quelltext bearbeiten]

\copy wp_coords_red0 TO '/mnt/user-store/kolossos/wp-world/new_red0'

\copy "pub_C_geo_id" TO '/mnt/user-store/kolossos/wp-world/new_C'