/************************************************* ** ** ** 例子数据集 ** ** ** *************************************************/ /* 一个班5个学生的姓名、性别、数学、语文成绩、平均分 */ data samp.c9501; input name $ 1-10 sex $ math chinese; avg = math*0.5 + chinese/120*100*0.5; cards; 李明 男 92 98 张红艺 女 89 106 王思明 男 86 90 张聪 男 98 109 刘颍 女 80 110 ; run; proc datasets; copy out=work in=sasuser; select c9501; quit; data c9501m c9501f; set c9501; select(sex); when('男') output c9501m; when('女') output c9501f; otherwise put sex= '有错'; end; drop sex; run; data c9501x; set c9501; keep name sex; run; data c9501y; set c9501; keep name math chinese; run; data bkmoney; input name $ amount; cards; 李明 20 张红艺 15 王思明 10 张聪 20 刘颍 50 ; run; /* 用于演示聚类和INSIGHT中分组符号、颜色的数据集, X,Y,Z区间变量,分为3个组(G) */ data samp.clus; do i=1 to 50; g='a'; x = normal(0); y = normal(0); z = normal(0); output; end; do i=1 to 50; g='b'; x = 3 + normal(0); y = 4 + normal(0); z = -3 + normal(0); output; end; do i=1 to 50; g='c'; x = 3 + normal(0); y = -4 + normal(0); z = -3 + normal(0); output; end; drop i; run; /* 很多行的数据集 */ data huge; array x(10); do i=1 to 10000; do j=1 to 10; x(j) = normal(0); end; output; end; drop i j; run; /* 工资单例子: 职工号(IdNumber)、性别(Sex)、工作类型代码(Jobcode)、 * 工资(Salary)、生日(Birth)、入职日期(Hired)。 * 可用于SQL。 */ data payroll; input IdNumber $ 1-4 Sex $ 6 Jobcode $ 8-10 Salary 12-16 @18 Birth date7. @26 Hired date7.; format birth hired mmddyy8.; datalines; 1009 M TA1 28880 02MAR59 26MAR92 1017 M TA3 40858 28DEC57 16OCT81 1036 F TA3 39392 19MAY65 23OCT84 1037 F TA1 28558 10APR64 13SEP92 1038 F TA1 26533 09NOV69 23NOV91 1050 M ME2 35167 14JUL63 24AUG86 1065 M ME2 35090 26JAN44 07JAN87 1076 M PT1 66558 14OCT55 03OCT91 1094 M FA1 22268 02APR70 17APR91 1100 M BCK 25004 01DEC60 07MAY88 ; run; /* OilProd: 原油产量,包括Country(国家)、日产原油(BarrelsPerDay) */ data OilProd; input Country $ 1-24 @26 BarrelsPerDay comma9.; format barrelsperday comma9.; datalines; Algeria 1,400,000 Canada 2,500,000 China 3,000,000 Egypt 900,000 Indonesia 1,500,000 Iran 4,000,000 Iraq 600,000 Kuwait 2,500,000 Libya 1,500,000 Mexico 3,400,000 Nigeria 2,000,000 Norway 3,500,000 Oman 900,000 Saudi Arabia 9,000,000 United States of America 8,000,000 United Arab Emirates 2,000,000 United Kingdom 3,000,000 Venezuela 3,000,000 USSR (former) 7,000,000 ; run; /* OilRsrvs: 原油储量数据,变量Country(国家)、储量(Barrels) */ data OilRsrvs; input Country $ 1-24 @26 Barrels comma15.; format barrels comma15.; datalines; Algeria 9,200,000,000 Canada 7,000,000,000 China 25,000,000,000 Egypt 4,000,000,000 Gabon 1,000,000,000 Indonesia 5,000,000,000 Iran 90,000,000,000 Iraq 110,000,000,000 Kuwait 95,000,000,000 Libya 30,000,000,000 Mexico 50,000,000,000 Nigeria 16,000,000,000 Norway 11,000,000,000 Saudi Arabia 260,000,000,000 United Arab Emirates 100,000,000 United Kingdom 4,500,000,000 United States of America 30,000,000,000 Venezuela 65,000,000,000 USSR (Former) 65,500,000,000 ; run; /* WorldCityCoords: 世界各大城市坐标数据, 变量 * City(城市名称)、Country(所在国家)、Latitude(纬度)、Longitude(经度). */ data worldcitycoords; input City $ 1-25 Country $ 28-46 Latitude 48-50 Longitude 53-56; datalines; Kabul Afghanistan 35 69 Algiers Algeria 37 3 Buenos Aires Argentina -34 -59 Cordoba Argentina -31 -64 Tucuman Argentina -27 -65 Adelaide Australia -35 138 Alice Springs Australia -24 134 Brisbane Australia -27 153 Darwin Australia -12 131 Melbourne Australia -38 145 Perth Australia -32 116 Sydney Australia -34 151 Vienna Austria 48 16 Nassau Bahamas 26 -77 Chittagong Bangladesh 22 92 Brussels Belgium 51 4 Belize Belize 17 -88 Kindley AFB Bermuda 33 -65 La Paz Bolivia -16 -69 Belem Brazil -1 -48 Belo Horizonte Brazil -20 -44 Brasilia Brazil -16 -48 Curitiba Brazil -25 -49 Fortaleza Brazil -4 -38 Porto Alegre Brazil -30 -51 Recife Brazil -9 -35 Rio de Janeiro Brazil -23 -43 Salvador Brazil -13 -38 Sao Paulo Brazil -23 -46 Sofia Bulgaria 43 23 Phnom Penh Cambodia 11 105 Calgary Canada 51 -114 Havre Canada 48 -110 Kingston Canada 44 -76 London Canada 43 -81 Moose Jaw Canada 50 -105 Montreal Canada 45 -73 Ottawa Canada 45 -76 Port Arthur Canada 48 -89 Quebec Canada 47 -71 St. John Canada 45 -66 Toronto Canada 44 -79 Victoria Canada 48 -123 Winnipeg Canada 50 -98 Punta Arenas Chile -53 -71 Santiago Chile -33 -71 Valparaiso Chile -33 -71 Chongquing China 29 106 Shanghai China 31 121 Baranquilla Colombia 11 -75 Bogota Colombia 4 -75 Cali Colombia 3 -76 Medellin Colombia 6 -75 Brazzaville Congo -4 15 Guantanamo Bay Cuba 20 -76 Havana Cuba 24 -82 Prague Czech Republic 51 14 Copenhagen Denmark 56 12 Santo Domingo Dominican Republic 18 -70 Cairo Egypt 30 31 San Salvador El Salvador 14 -89 Guayaquil Ecuador -21 -80 Quito Ecuador 0 -78 Addis Ababa Ethiopia 9 39 Asmara Ethiopia 15 39 Helsinki Finland 60 25 Lyon France 46 5 Marseilles France 43 5 Nantes France 47 -1 Nice France 44 7 Paris France 49 2 Strasbourg France 48 8 Cayenne French Guiana 5 -52 Berlin Germany 52 13 Hamburg Germany 53 10 Hannover Germany 52 10 Mannheim Germany 49 8 Munich Germany 49 11 Accra Ghana 5 0 Gibraltar Gibraltar 37 -5 Athens Greece 38 24 Thessaloniki Greece 40 23 Guatemala City Guatemala 14 -90 Georgetown Guyana 7 -58 Port Au Prince Haiti 18 -72 Tegucigalpa Honduras 15 -87 Hong Kong Hong Kong 22 114 Budapest Hungary 47 19 Reykjavik Iceland 65 22 Ahmenabad India 22 72 Bangalore India 13 77 Bombay India 19 73 Calcutta India 22 88 Madras India 14 80 Nagpur India 22 80 New Delhi India 28 77 Djakarta Indonesia -6 107 Kupang Indonesia -10 123 Makassar Indonesia -6 119 Medan Indonesia 3 99 Palembang Indonesia -3 105 Surabaya Indonesia -7 113 Abadan Iran 30 48 Meshed Iran 36 59 Tehran Iran 36 51 Baghdad Iraq 33 44 Mosul Iraq 36 44 Dublin Ireland 53 -6 Shannon Ireland 53 -9 Jerusalem Israel 32 35 Tel Aviv Israel 33 35 Milan Italy 45 9 Naples Italy 41 14 Rome Italy 42 12 Fukuoka Japan 33 130 Sapporo Japan 44 141 Tokyo Japan 36 140 Amman Jordan 32 36 Nairobi Kenya -1 37 Pyongyang Korea, North 39 126 Seoul Korea, South 37 127 Beirut Lebanon 34 35 Monrovia Liberia 6 -11 Benghazi Libya 33 21 Tananarive Madagascar -19 47 Kuala Lumpur Malaysia 4 102 Penang Malaysia 5 100 Guadalajara Mexico 21 -103 Merida Mexico 21 -89 Mexico City Mexico 19 -99 Monterrey Mexico 26 -100 Vera Cruz Mexico 19 -97 Casablanca Morocco 33 -7 Katmandu Nepal 28 85 Amsterdam Netherlands 52 5 Auckland New Zealand -37 175 Christchurch New Zealand -43 172 Wellington New Zealand -41 175 Managua Nicaragua 12 -86 Lagos Nigeria 6 3 Bergen Norway 60 5 Oslo Norway 60 11 Karachi Pakistan 25 67 Lahore Pakistan 31 74 Peshwar Pakistan 34 71 Panama City Panama 9 -79 Port Moresby Papua New Guinea -9 148 Ascuncion Paraguay -25 -57 Lima Peru -13 -77 Manila Philippines 14 121 Krakow Poland 51 20 Warsaw Poland 52 21 Lisbon Portugal 39 -10 San Juan Puerto Rico 18 -67 Bucharest Romania 44 27 Kiev Russia 50 30 Leningrad Russia 60 30 Minsk Russia 54 27 Moscow Russia 56 38 Odessa Russia 46 31 Tashkent Russia 41 69 Tbilisi Russia 42 45 Vladivostok Russia 44 132 Volgograd Russia 49 44 Dhahran Saudi Arabia 26 51 Jedda Saudi Arabia 21 39 Riyadh Saudi Arabia 24 47 Dakar Senegal 15 -17 Singapore Singapore 1 104 Mogadiscio Somalia 2 49 Cape Town South Africa -34 18 Johannesburg South Africa -26 28 Pretoria South Africa -26 28 Aden Yemen 13 45 Barcelona Spain 41 3 Madrid Spain 40 -4 Valencia Spain 39 0 Colombo Sri Lanka 7 80 Khartoum Sudan 15 32 Paramaribo Suriname 6 -56 Stockholm Sweden 59 19 Zurich Switzerland 47 8 Damascus Syria 33 36 Tainan Taiwan 23 120 Taipei Taiwan 25 121 Dar es Salaam Tanzania -7 39 Bangkok Thailand 14 100 Port of Spain Trinidad and Tobago 11 -61 Tunis Tunisia 37 10 Adana Turkey 37 35 Ankara Turkey 40 33 Istanbul Turkey 41 29 Izmir Turkey 38 27 Belfast Northern Ireland 54 -6 Birmingham England 52 -2 Cardiff Wales 51 -3 Edinburgh Scotland 56 -3 Glasgow Scotland 56 -4 London England 51 0 Montevideo Uruguay -35 -56 Caracas Venezuela 10 -67 Maracaibo Venezuela 10 -71 Da Nang Vietnam 17 108 Hanoi Vietnam 21 106 Ho Chi Minh City (Saigon) Vietnam 11 107 Belgrade Yugoslavia 45 20 Acapulco Mexico 17 -100 Beijing China 40 116 San Jose Costa Rica 10 -85 Hamilton Bermuda 32 -65 Vancouver Canada 49 -124 Kingston Jamaica 18 -77 ; run; /* Countries数据集: 各国家基本情况数据。 * 包括Name(国家名称)、Capital(首都)、Population(人口数)、 * Area(面积)、Continent(所在洲)、UNDate(加入联合国年)。 */ data Countries; input Name $ 1-35 Capital $ 37-55 Population 57-65 Area 67-75 Continent $ 77-107 UNDate 109-112; datalines; Afghanistan Kabul 17070323 251825 Asia 1946 Albania Tirane 3407400 11100 Europe 1955 Algeria Algiers 28171132 919595 Africa 1962 Andorra Andorra la Vella 64634 200 Europe 1993 Angola Luanda 9901050 481300 Africa 1976 Antigua and Barbuda St. John's 65644 171 Central America and Caribbean 1981 Argentina Buenos Aires 34248705 1073518 South America 1945 Armenia Yerevan 3556864 11500 Asia 1992 Australia Canberra 18255944 2966200 Australia 1945 Austria Vienna 8033746 32400 Europe 1955 Azerbaijan Baku 7760064 33400 Asia 1992 Bahamas Nassau 275703 5400 Central America and Caribbean 1973 Bahrain Manama 591800 300 Asia 1971 Bangladesh Dhaka 1.2639E8 57300 Asia 1974 Barbados Bridgetown 258534 200 Central America and Caribbean 1966 Belarus Minsk 10508000 80100 Europe 1945 Belgium Brussels 10162614 11800 Europe 1945 Belize Belmopan 211069 8900 Central America and Caribbean 1981 Benin Porto Novo 5394881 43500 Africa 1960 Bermuda Hamilton 60594 100 . Bhutan Thimphu 1756214 18100 Asia 1971 Bolivia La Paz 7795410 424200 South America 1945 Bosnia and Herzegovina Sarajevo 4697040 19700 Europe 1992 Botswana Gaborone 1372453 224600 Africa 1966 Brazil Brasilia 1.6031E8 3286500 South America 1945 Brunei Bandar Seri Begawan 287822 2200 Asia 1984 Bulgaria Sofia 8887111 42900 Europe 1955 Burkina Faso Ouagodougou 10235326 105900 Africa 1960 Burundi Bujumbura 6185632 10700 Africa 1962 Cambodia Phnom Penh 10366614 70200 Asia 1955 Cameroon Yaounde 13261994 183600 Africa 1960 Canada Ottawa 28392302 3849674 North America 1945 Cape Verde Praia 427188 1600 Africa . Cayman Islands Georgetown 23228 100 Central America and Caribbean . Central African Republic Bangui 3173103 240300 Africa 1960 Chad N'Djamena 5521118 495800 Africa 1960 Channel Islands 146436 100 Europe . Chile Santiago 14089101 292100 South America 1945 China Beijing 1.2022E9 3696100 Asia 1945 Colombia Bogota 35930188 440800 South America 1945 Comoros Moroni 535246 700 Africa 1975 Congo Brazzaville 2471223 132000 Africa 1960 Congo, Democratic Republic of Kinshasa 43106529 905400 Africa 1960 Costa Rica San Jose 3375083 19700 Central America and Caribbean 1945 Cote D'Ivoire Yamoussoukro 14437516 124500 Africa 1960 Croatia Zagreb 4744505 21800 Europe 1992 Cuba Havana 11173523 42800 Central America and Caribbean 1945 Cyprus Nicosia 737226 3600 Asia 1960 Czech Republic Prague 10511029 30400 Europe 1993 Denmark Copenhagen 5239356 16600 Europe 1945 Djibouti Djibouti 417089 8900 Africa 1977 Dominica Roseau 88871 300 Central America and Caribbean 1978 Dominican Republic Santo Domingo 7903469 18700 Central America and Caribbean 1945 Ecuador Quito 10782691 105000 South America 1945 Egypt Cairo 59912259 385200 Africa 1945 El Salvador San Salvador 5809949 8100 Central America and Caribbean 1945 England London 49293170 50400 Europe 1945 Equatorial Guinea Malabo 414059 10800 Africa 1968 Eritrea Asmera 3231677 45300 Africa 1993 Estonia Tallinn 1633006 17400 Europe 1991 Ethiopia Addis Ababa 59291170 437800 Africa 1945 Fiji Suva 771563 7100 Oceania 1970 Finland Helsinki 5119178 130600 Europe 1955 France Paris 58412558 210000 Europe 1945 French Guiana Cayenne 102000 43700 South America . Gabon Libreville 1150275 103300 Africa 1960 Gambia (The) Banjul 968493 4100 Africa 1965 Georgia, Republic of Tbilisi 5737236 26900 Asia 1992 Germany Berlin 81890690 137700 Europe 1973 Ghana Accra 17395511 92100 Africa 1957 Gibraltar Gibraltar 30297 100 Europe . Greece Athens 10669583 51000 Europe 1945 Grenada St. George's 94931 100 Central America and Caribbean 1974 Guatemala Guatemala City 10827127 42000 Central America and Caribbean 1945 Guinea Conakry 6455275 94900 Africa 1958 Guinea-Bissau Bissau 1108869 13900 Africa 1974 Guyana Georgetown 736216 83000 South America 1966 Haiti Port-au-Prince 6555255 10700 Central America and Caribbean 1945 Honduras Tegucigalpa 5367613 43300 Central America and Caribbean 1945 Hong Kong Victoria 5857414 400 Asia . Hungary Budapest 10421148 35900 Europe 1955 Iceland Reykjavik 266614 36700 1946 India New Delhi 9.2901E8 1222600 Asia 1945 Indonesia Jakarta 2.0239E8 741100 Asia 1950 Iran Tehran 66261493 632500 Asia 1945 Iraq Baghdad 20086891 168000 Asia 1945 Ireland Dublin 3574032 27100 Europe 1955 Isle of Man Douglas 70693 200 Europe . Israel Jerusalem 5101000 8000 Asia 1949 Italy Rome 58713508 116300 Europe 1955 Jamaica Kingston 2580291 4200 Central America and Caribbean 1962 Japan Tokyo 1.2635E8 145900 Asia 1956 Jordan Amman 4000210 34300 Asia 1955 Kalaallit Nunaat Nuuk 57564 840000 . Kazakhstan Almaty 17438936 1049200 Asia 1992 Kenya Nairobi 28520558 225000 Africa 1963 Kiribati Tarawa 78772 300 Oceania . Korea, North Pyongyang 23295340 47400 Asia 1991 Korea, South Seoul 45529277 38300 Asia 1991 Kuwait Kuwait City 1837006 6900 Asia 1963 Kyrgyzstan Bishkek 4744505 76600 Asia 1992 Laos Vientiane 4748545 91400 Asia 1955 Latvia Riga 2776212 24900 Europe 1991 Lebanon Beirut 3655834 3900 Asia 1945 Leeward Islands Plymouth 12119 100 Central America and Caribbean . Lesotho Maseru 1963244 11700 Africa 1966 Liberia Monrovia 3002430 38200 Africa 1945 Libya Tripoli 5107059 679400 Africa 1955 Liechtenstein Vaduz 30297 100 Europe 1990 Lithuania Vilnius 3886091 25200 Europe 1991 Luxembourg Luxembourg 405980 100 Europe 1945 Macedonia Skopje 2235917 9900 Europe 1993 Madagascar Antananarivo 13560924 226700 Africa 1960 Malawi Lilongwe 9828337 45700 Africa 1964 Malaysia Kuala Lumpur 19473883 127600 Asia 1957 Maldives Male 254495 100 Asia 1965 Mali Bamako 9203210 482100 Africa 1960 Malta Valletta 370633 100 Europe 1964 Marshall Islands Majuro 54535 100 Oceania 1991 Mauritania Nouakchott 2214709 398000 Africa 1961 Mauritius Port Louis 1128057 1000 Africa 1968 Mexico Mexico City 93114708 756100 North America 1945 Micronesia Palikir 121188 300 Oceania 1991 Moldova Chisinau 4517279 13000 Europe 1992 Monaco Monaco 31307 100 Europe 1993 Mongolia Ulaan Baatar 2454055 604800 Asia 1961 Montenegro Titograd 626137 5300 Europe . Morocco Rabat 28841705 177100 Africa 1956 Mozambique Maputo 17517708 313700 Africa 1975 Myanmar Yangon 44715298 261200 Asia 1948 Namibia Windhoek 1611798 318100 Africa 1990 Nauru Yaren 10099 100 Oceania . Nepal Kathmandu 21250295 56800 Asia 1955 Netherlands Amsterdam 15538306 16000 Europe 1945 Netherlands Antilles Willemstad 185822 400 Central America and Caribbean . New Zealand Wellington 3422548 104500 Oceania 1945 Nicaragua Managua 4137556 50900 Central America and Caribbean 1945 Niger Niamey 8720477 497000 Africa 1960 Nigeria Abuja 99062003 356700 Africa 1960 Northern Ireland Belfast 1585541 5500 Europe . Norway Oslo 4357714 125100 Europe 1945 Oman Muscat 1717838 118200 Asia 1971 Pakistan Islamabad 1.2306E8 339700 Asia 1947 Panama Panama City 2656034 29200 Central America and Caribbean 1945 Papua New Guinea Port Moresby 4238546 178700 Asia 1975 Paraguay Asuncion 5265614 157000 South America 1945 Peru Lima 23885121 496200 South America 1945 Philippines Manila 70500039 115900 Asia 1945 Poland Warsaw 39037645 120700 Europe 1945 Portugal Lisbon 10628177 35700 Europe 1955 Puerto Rico San Juan 3556864 3492 Central America and Caribbean . Qatar Doha 518078 4400 Asia 1971 Romania Bucharest 23410469 91700 Europe 1955 Russia Moscow 1.5109E8 6592800 Europe 1945 Rwanda Kigali 8456895 10200 Africa 1962 Saint Kitts and Nevis Basseterre 41406 100 Central America and Caribbean 1983 Saint Lucia Castries 146436 200 Central America and Caribbean 1979 Saint Vincent and the Grenadines Kingstown 116138 200 Central America and Caribbean 1980 San Marino San Marino 24238 100 Europe 1992 Sao Tome and Principe Sao Tome 138356 400 Africa 1975 Saudi Arabia Riyadh 18377132 865000 Asia 1945 Scotland Edinburgh 5006069 30400 Europe . Senegal Dakar 8817428 76000 Africa 1960 Serbia Belgrade 9755624 34100 Europe . Seychelles Victoria 72713 200 Africa 1976 Sierra Leone Freetown 4675832 27200 Africa 1961 Singapore Singapore 2887301 200 Asia 1965 Slovakia Bratislava 5457495 18900 Europe 1993 Slovenia Ljubljana 1991521 7800 Europe 1992 Solomon Islands Honiara 389821 11000 Oceania 1978 Somalia Mogadishu 6732996 246300 Africa 1960 South Africa Cape Town 44365873 473300 Africa 1945 Spain Madrid 39692061 194900 Europe 1955 Sri Lanka Colombo 18211509 25300 Asia 1955 Sudan Khartoum 29711229 966800 Africa 1956 Suriname Paramaribo 427188 63300 South America 1975 Swaziland Mbabane 945265 6700 Africa 1968 Sweden Stockholm 8864893 173700 Europe 1946 Switzerland Bern 7109689 15900 Europe . Syria Damascus 15034366 71500 Asia 1945 Taiwan Taipei 21509839 14000 Asia . Tajikistan Dushanbe 6054344 55300 Asia 1992 Tanzania Dar-es-Salaam 28263033 36400 Africa 1961 Thailand Bangkok 60099089 198100 Asia 1946 Togo Lome 4297120 21900 Africa 1960 Tonga Nuku'alofa 106040 300 Oceania . Trinidad and Tobago Port of Spain 1341146 2000 Central America and Caribbean 1962 Tunisia Tunis 8813388 63400 Africa 1956 Turkey Ankara 62769263 300948 Europe 1945 Turkmenistan Ashgabat 4034546 188400 Asia 1992 Turks and Caicos Islands Grand Turk 12119 200 Central America and Caribbean . Tuvalu Funafuti 10099 100 Oceania . Uganda Kampala 20055584 93100 Africa 1962 Ukraine Kiev 52360233 233100 Europe 1945 United Arab Emirates Abu Dhabi 2818628 30000 Asia 1971 United States Washington 2.6329E8 3787318 North America 1945 Uruguay Montevideo 3230667 68000 South America 1945 Uzbekistan Tashkent 22832806 172700 Asia 1992 Vanuatu Vila 171683 4700 Oceania 1981 Vatican City Vatican City 1010 2 Europe . Venezuela Caracas 20765543 352100 South America 1945 Vietnam Hanoi 73827657 127200 Asia 1977 Wales Cardiff 2825697 8000 Europe . Western Samoa Apia 206020 1100 Oceania 1976 Yemen Sanaa 11214929 205300 Asia 1947 Yugoslavia Belgrade 10866513 39400 Europe 1945 Zambia Lusaka 9278952 290600 Africa 1964 Zimbabwe Harare 11083641 150900 Africa 1980 ; run; *'; /* EXPREV: 一些地区的支出和收入数据 * 变量Region(地区)、State(州)、Month(年月)、支出(Expenses)、收入(Revenues)。 */ data exprev; input Region $ State $ Month monyy5. Expenses Revenues; format month monyy5.; datalines; Southern GA JAN95 2000 8000 Southern GA FEB95 1200 6000 Southern FL FEB95 8500 11000 Northern NY FEB95 3000 4000 Northern NY MAR95 6000 5000 Southern FL MAR95 9800 13500 Northern MA MAR95 1500 1000 ; run; /* Pilots: 飞行员信息数据。 * 变量ID(身份编码)、LastName(姓)、FirstName(名)、 * City(城市)、State(州)、Gender(性别)、 * JobCode(工作类型代码)、Salary(工资)。 */ data pilots; infile datalines; input ID $ 1-4 LastName $ 6-15 FirstName $ 17-25 City $ 27-38 State $ 40-41 Gender $ 43 JobCode $ 45-47 @49 Salary @56; datalines; 1333 BLAIR JUSTIN STAMFORD CT M PT2 88606 1739 BOYCE JONATHAN NEW YORK NY M PT1 66517 1428 BRADY CHRISTINE STAMFORD CT F PT1 68767 1404 CARTER DONALD NEW YORK NY M PT2 91376 1118 DENNIS ROGER NEW YORK NY M PT3 111379 1905 GRAHAM ALVIN NEW YORK NY M PT1 65111 1407 GRANT DANIEL MT. VERNON NY M PT1 68096 1410 HARRIS CHARLES STAMFORD CT M PT2 84685 1439 HARRISON FELICIA BRIDGEPORT CT F PT1 70736 1545 HUNTER CLYDE STAMFORD CT M PT1 66130 1777 LUFKIN ROY NEW YORK NY M PT3 109630 1106 MARSHBURN JASPER STAMFORD CT M PT2 89632 1333 NEWKIRK SANDRA PRINCETON NJ F PT2 84536 1478 NEWTON JAMES NEW YORK NY M PT2 84203 1556 PENNINGTON MICHAEL NEW YORK NY M PT1 71349 1890 STEPHENSON ROBERT NEW YORK NY M PT2 85896 1107 THOMPSON WAYNE NEW YORK NY M PT2 89977 1830 TRIPP KATHY BRIDGEPORT CT F PT2 84471 1928 UPCHURCH LARRY WHITE PLAINS NY M PT2 89858 1076 VENTER RANDALL NEW YORK NY M PT1 66558 ; run; /* Wghtclub: 减肥数据。 * 变量idno(编号)、name(姓名)、team(分组)、strtwght(起始体重)、 * endwght(减肥后体重)、loss(减重)。 */ data wghtclub; input idno 1-4 name $ 6-24 team $ strtwght endwght; loss=strtwght-endwght; datalines; 1023 David Shaw red 189 165 1049 Amelia Serrano yellow 145 124 1219 Alan Nance red 210 192 1246 Ravi Sinha yellow 194 177 1078 Ashley McKnight red 127 118 run; /* Patients: 病人基本信息数据 * 变量ID(编号)、Name(姓名)、Sex(性别)、Age(年龄)、 * Date(住院日期)、Height(身高)、Weight(体重)、 * ActLevel(活动级别)、Fee(费用) */ data patients; input ID $ 1-4 Name $ 6-16 Sex $ 18 Age 20-21 Date 23-24 Height 26-27 Weight 29-31 ActLevel $ 33-36 Fee 38-43; format fee 6.2; datalines; 2458 Murray, W M 27 1 72 168 HIGH 85.20 2462 Almers, C F 34 3 66 152 HIGH 124.80 2523 Johnson, R F 43 31 63 137 MOD 149.75 2539 LaMance, K M 51 4 71 158 LOW 124.80 2544 Jones, M M 29 6 76 193 HIGH 124.80 2552 Reberson, P F 32 9 67 151 MOD 149.75 2555 King, E M 35 13 70 173 MOD 149.75 2563 Pitts, D M 34 22 73 154 LOW 124.80 2571 Nunnelly, A F 44 19 66 140 HIGH 149.75 2572 Oberon, M F 28 17 62 118 LOW 85.20 2574 Peterson, V M 30 6 69 147 MOD 149.75 2575 Quigley, M F 40 8 69 163 HIGH 124.80 2578 Cameron, L M 47 5 72 173 MOD 124.80 2586 Derber, B M 25 23 75 188 HIGH 85.20 2588 Ivan, H F 22 20 63 139 LOW 85.20 2589 Wilcox, E F 41 16 67 141 HIGH 149.75 2595 Warren, C M 54 7 71 183 MOD 149.75 ; run; /* 用于演示转置的矩阵 */ data mat; input x1 x2 x3; cards; 1 2 3 4 5 6 7 8 9 10 11 12 ; run; /* 用于演示合并行的矩阵,每个病人(num)有两行,一行的test为'a', * 一行的test为'b', 变量val保存药效值。 */ data onecol; input num test$ val; cards; 1 a 11 2 a 12 3 a 13 1 b 21 2 b 22 3 b 23 ; run; /* 用于演示拆分行的数据集。num为病人编号,test1和test2分别为A药和B药的疗效。 */ data twocol; input num test1 test2; cards; 1 11 21 2 12 22 3 13 23 ; run; /* 眼睛颜色和头发颜色的数据集 */ data color; input Region Eyes $ Hair $ Count @@; label eyes='Eye Color' hair='Hair Color' region='Geographic Region'; datalines; 1 blue fair 23 1 blue red 7 1 blue medium 24 1 blue dark 11 1 green fair 19 1 green red 7 1 green medium 18 1 green dark 14 1 brown fair 34 1 brown red 5 1 brown medium 41 1 brown dark 40 1 brown black 3 2 blue fair 46 2 blue red 21 2 blue medium 44 2 blue dark 40 2 blue black 6 2 green fair 50 2 green red 31 2 green medium 37 2 green dark 23 2 brown fair 56 2 brown red 42 2 brown medium 53 2 brown dark 54 2 brown black 13 ; run; /* 出租车票统计表。amount为金额,num为该金额的张数。 */ data samp.taxif; input amount num @@; cards; 10 4 12 6 13 1 15 1 16 1 19 5 20 3 23 1 24 1 25 1 26 3 27 1 32 1 47 1 48 2 49 1 52 1 55 1 58 1 81 1 ; run; /* 二元正态分布密度 */ data dnorm2; a=2; a2=sqrt(a); r=0.5; det=a*(1-r*r); do x=-3 to 3 by 0.3; do y=-3*a2 to 3*a2 by 0.3*a2; z=1/(2*3.1415926*det)*exp(-0.5/det* (a*x*x + y*y - 2*r*a2*x*y)); output; end; end; keep x y z; run; /* 股票年数据。 * 变量Year(年)、DateOfHigh(最高值日期)、DowJonesHigh(道琼斯最高值)、 * DateOfLow(最低值日期)、DowJonesLow(最低值日期), * LogDowHigh和LogDowLow是自然对数值。 */ data stocks; input Year @7 DateOfHigh:date9. DowJonesHigh @26 DateOfLow:date9. DowJonesLow; format LogDowHigh LogDowLow 5.2 DateOfHigh DateOfLow date9.; LogDowHigh=log(DowJonesHigh); LogDowLow=log(DowJonesLow); datalines; 1954 31DEC1954 404.39 11JAN1954 279.87 1955 30DEC1955 488.40 17JAN1955 388.20 1956 06APR1956 521.05 23JAN1956 462.35 1957 12JUL1957 520.77 22OCT1957 419.79 1958 31DEC1958 583.65 25FEB1958 436.89 1959 31DEC1959 679.36 09FEB1959 574.46 1960 05JAN1960 685.47 25OCT1960 568.05 1961 13DEC1961 734.91 03JAN1961 610.25 1962 03JAN1962 726.01 26JUN1962 535.76 1963 18DEC1963 767.21 02JAN1963 646.79 1964 18NOV1964 891.71 02JAN1964 768.08 1965 31DEC1965 969.26 28JUN1965 840.59 1966 09FEB1966 995.15 07OCT1966 744.32 1967 25SEP1967 943.08 03JAN1967 786.41 1968 03DEC1968 985.21 21MAR1968 825.13 1969 14MAY1969 968.85 17DEC1969 769.93 1970 29DEC1970 842.00 06MAY1970 631.16 1971 28APR1971 950.82 23NOV1971 797.97 1972 11DEC1972 1036.27 26JAN1972 889.15 1973 11JAN1973 1051.70 05DEC1973 788.31 1974 13MAR1974 891.66 06DEC1974 577.60 1975 15JUL1975 881.81 02JAN1975 632.04 1976 21SEP1976 1014.79 02JAN1976 858.71 1977 03JAN1977 999.75 02NOV1977 800.85 1978 08SEP1978 907.74 28FEB1978 742.12 1979 05OCT1979 897.61 07NOV1979 796.67 1980 20NOV1980 1000.17 21APR1980 759.13 1981 27APR1981 1024.05 25SEP1981 824.01 1982 27DEC1982 1070.55 12AUG1982 776.92 1983 29NOV1983 1287.20 03JAN1983 1027.04 1984 06JAN1984 1286.64 24JUL1984 1086.57 1985 16DEC1985 1553.10 04JAN1985 1184.96 1986 02DEC1986 1955.57 22JAN1986 1502.29 1987 25AUG1987 2722.42 19OCT1987 1738.74 1988 21OCT1988 2183.50 20JAN1988 1879.14 1989 09OCT1989 2791.41 03JAN1989 2144.64 1990 16JUL1990 2999.75 11OCT1990 2365.10 1991 31DEC1991 3168.83 09JAN1991 2470.30 1992 01JUN1992 3413.21 09OCT1992 3136.58 1993 29DEC1993 3794.33 20JAN1993 3241.95 1994 31JAN1994 3978.36 04APR1994 3593.35 1995 13DEC1995 5216.47 30JAN1995 3832.08 1996 27DEC1996 6560.91 10JAN1996 5032.94 1997 06AUG1997 8259.31 11APR1997 6391.69 1998 23NOV1998 9374.27 31AUG1998 7539.07 ; run; /* 健身数据: 年龄、性别、心率、锻炼、有氧运动(Aerobic) */ data samp.fitness2; input Age Sex $ HeartRate Exercise Aerobic; datalines; 28 M 86 2 36.6 41 M 76 3 26.7 30 M 78 2 33.8 39 F 90 1 13.6 28 M 96 1 33. 26 M 74 2 42.7 . F 66 4 36.1 48 F 72 2 22.6 31 M 60 3 44.1 28 F 84 2 22.1 33 F 56 4 21.3 37 F 78 2 30.3 46 M 84 1 34.2 23 M 72 2 38.1 25 F 88 1 32.0 37 F 72 2 43.7 42 M 60 3 36.7 44 F 78 3 21.6 . F 70 1 22.8 25 F 60 3 36.1 24 F 74 2 29.9 29 F 66 4 38.9 27 M 62 4 44.0 24 M 72 3 44.2 36 F 80 1 26.2 24 M 82 2 18.7 23 M 54 3 70.6 28 F 76 1 23.8 30 F 66 2 28.9 25 M 54 3 41.3 48 F 72 2 28.9 23 F 68 1 18.9 22 F 78 2 39.0 23 F 66 3 36.1 46 F 54 3 28.9 31 F 84 1 21.6 45 M 60 2 47.8 27 M 90 2 43.1 26 M 66 2 28.9 26 F 84 2 . 24 M 72 3 50.1 32 F 72 1 15.7 29 M 54 3 44.8 48 F 66 2 28.9 36 F 66 2 33.2 ; run; /* 各类工程师平均工资。 * eng: 工程师种类。dollars: 工资。num:人数。 */ data jobs; length eng $5; input eng dollars num; datalines; Civil 27308 73273 Aero 29844 70192 Elec 22920 89382 Mech 32816 19601 Chem 28116 25541 Petro 18444 34833 ; run; /* 体重与身高数据 */ data samp.stats; input height weight; datalines; 69.0 112.5 56.5 84.0 65.3 98.0 62.8 102.5 63.5 102.5 57.3 83.0 59.8 84.5 62.5 112.5 62.5 84.0 59.0 99.5 51.3 50.5 64.3 90.0 56.3 77.0 66.5 112.0 72.0 150.0 64.8 128.0 67.0 133.0 57.5 85.0 ; run; /* 各城市平均气温数据。mon:月; faren: 气温; city: 城市。 */ data citytemp; input month faren city $; datalines; 1 40.5 Raleigh 1 12.2 Minn 1 52.1 Phoenix 2 42.2 Raleigh 2 16.5 Minn 2 55.1 Phoenix 3 49.2 Raleigh 3 28.3 Minn 3 59.7 Phoenix 4 59.5 Raleigh 4 45.1 Minn 4 67.7 Phoenix 5 67.4 Raleigh 5 57.1 Minn 5 76.3 Phoenix 6 74.4 Raleigh 6 66.9 Minn 6 84.6 Phoenix 7 77.5 Raleigh 7 71.9 Minn 7 91.2 Phoenix 8 76.5 Raleigh 8 70.2 Minn 8 89.1 Phoenix 9 70.6 Raleigh 9 60.0 Minn 9 83.8 Phoenix 10 60.2 Raleigh 10 50.0 Minn 10 72.2 Phoenix 11 50.0 Raleigh 11 32.4 Minn 11 59.8 Phoenix 12 41.2 Raleigh 12 18.6 Minn 12 52.5 Phoenix ; run; /* 销售数据。dept: 部门类别。site: 城市。quarter: 季度。sales: 销售额 */ data totals; length dept $ 7 site $ 8; input dept site quarter sales; datalines; Parts Sydney 1 4043.97 Parts Atlanta 1 6225.26 Parts Paris 1 3543.97 Repairs Sydney 1 5592.82 Repairs Atlanta 1 9210.21 Repairs Paris 1 8591.98 Tools Sydney 1 1775.74 Tools Atlanta 1 2424.19 Tools Paris 1 5914.25 Parts Sydney 2 3723.44 Parts Atlanta 2 11595.07 Parts Paris 2 9558.29 Repairs Sydney 2 5505.31 Repairs Atlanta 2 4589.59 Repairs Paris 2 7538.56 Tools Sydney 2 2945.17 Tools Atlanta 2 1903.99 Tools Paris 2 7868.34 Parts Sydney 3 8437.96 Parts Atlanta 3 6847.91 Parts Paris 3 6789.85 Repairs Sydney 3 4426.46 Repairs Atlanta 3 5011.66 Repairs Paris 3 6510.38 Tools Sydney 3 3767.10 Tools Atlanta 3 3048.52 Tools Paris 3 9017.96 Parts Sydney 4 6065.57 Parts Atlanta 4 9388.51 Parts Paris 4 8509.08 Repairs Sydney 4 3012.99 Repairs Atlanta 4 2088.30 Repairs Paris 4 5530.37 Tools Sydney 4 3817.36 Tools Atlanta 4 4354.18 Tools Paris 4 6511.70 ; run; /* 能源产量数据。year: 年。engytype: 能源类型。produceds: 产量。 */ data enprod; input @1 year 4. @6 engytype $8. @16 produced 5.2; datalines; 1985 Coal 19.33 1985 Gas 19.22 1985 Petro 18.99 1985 Nuclear 4.15 1985 Hydro 2.97 1985 Geotherm .20 1985 Biofuels .01 1995 Coal 21.98 1995 Gas 21.54 1995 Petro 13.89 1995 Nuclear 7.18 1995 Hydro 3.21 1995 Geotherm .31 1995 Biofuels 2.95 ; run; /* 拒收配件数据。site: 城市。date: 日期。badparts: 拒收的配件数。 */ data rejects; informat date date9.; input site $ date badparts; datalines; Sydney 01JAN1997 22 Sydney 01FEB1997 26 Sydney 01MAR1997 14 Sydney 01APR1997 18 Sydney 01MAY1997 28 Sydney 01JUN1997 22 Sydney 01JUL1997 15 Sydney 01AUG1997 18 Sydney 01SEP1997 19 Sydney 01OCT1997 15 Sydney 01NOV1997 31 Sydney 01DEC1997 23 Atlanta 01JAN1997 18 Atlanta 01FEB1997 22 Atlanta 01MAR1997 20 Atlanta 01APR1997 23 Atlanta 01MAY1997 10 Atlanta 01JUN1997 21 Atlanta 01JUL1997 29 Atlanta 01AUG1997 20 Atlanta 01SEP1997 17 Atlanta 01OCT1997 14 Atlanta 01NOV1997 16 Atlanta 01DEC1997 18 Paris 01JAN1997 13 Paris 01FEB1997 18 Paris 01MAR1997 27 Paris 01APR1997 29 Paris 01MAY1997 26 Paris 01JUN1997 20 Paris 01JUL1997 28 Paris 01AUG1997 21 Paris 01SEP1997 12 Paris 01OCT1997 8 Paris 01NOV1997 12 Paris 01DEC1997 19 ; run; /* 订制蛋糕数据。 * LastName: 姓。Age: 年龄。PresentScore: 积分。 * TasteScore: 口味积分。Flavor: 风味。Layers: 层数。 */ data cake; input LastName $ 1-12 Age 13-14 PresentScore 16-17 TasteScore 19-20 Flavor $ 23-32 Layers 34 ; datalines; Orlando 27 93 80 Vanilla 1 Ramey 32 84 72 Rum 2 Goldston 46 68 75 Vanilla 1 Roe 38 79 73 Vanilla 2 Larsen 23 77 84 Chocolate . Davis 51 86 91 Spice 3 Strickland 19 82 79 Chocolate 1 Nguyen 57 77 84 Vanilla . Hildenbrand 33 81 83 Chocolate 1 Byron 62 72 87 Vanilla 2 Sanders 26 56 79 Chocolate 1 Jaeger 43 66 74 1 Davis 28 69 75 Chocolate 2 Conrad 69 85 94 Vanilla 1 Walters 55 67 72 Chocolate 2 Rossburger 28 78 81 Spice 2 Matthew 42 81 92 Chocolate 2 Becker 36 62 83 Spice 2 Anderson 27 87 85 Chocolate 1 Merritt 62 73 84 Chocolate 1 ; run; /* 学生分数例子。包括名字、性别、状态(status)、年、分区(section)、 * 分数(score)、最终分数(FinalGrade)。 */ data grade; input Name $ 1-8 Gender $ 11 Status $13 Year $ 15-16 Section $ 18 Score 20-21 FinalGrade 23-24; datalines; Abbott F 2 97 A 90 87 Branford M 1 98 A 92 97 Crandell M 2 98 B 81 71 Dennison M 1 97 A 85 72 Edgar F 1 98 B 89 80 Faust M 1 97 B 78 73 Greeley F 2 97 A 82 91 Hart F 1 98 B 84 80 Isley M 2 97 A 88 86 Jasper M 1 97 B 91 93 ; run; **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; /************************************************* ** ** ** 第一章 ** ** ** *************************************************/ title '95级1班学生成绩排名'; data c9501; input name $ 1-10 sex $ math chinese; avg = math*0.5 + chinese/120*100*0.5; cards; 李明 男 92 98 张红艺 女 89 106 王思明 男 86 90 张聪 男 98 109 刘颍 女 80 110 ; run; proc print;run; proc sort data=c9501; by descending avg; run; proc print;run; /* 用LIBNAME指定库名(LIBREF) */ libname mylib "c:\y1995"; /******* 求偏度、峰度标准误差的宏 ********/ %macro seskewkurt(data, variable); proc means data = &data n skew kurtosis; var &variable; output out=outmeans n=n skew=skew kurtosis=kurtosis; proc print data = outmeans; data _null_; set outmeans; call symput('getn', n); call symput('getkurtosis', kurtosis); call symput('getskew', skew); run; %let seskew=%sysevalf((((6*&getn)*(&getn-1))/((&getn-2)*(&getn+1)*(&getn+3)))**0.5); %let sekurt=%sysevalf(2*&seskew*((&getn**2*(2-1))/((&getn-3)*(&getn+5)))**0.5) ; %let zkurt = %sysevalf(&getkurtosis/&sekurt); %let zskew = %sysevalf(&getskew/&seskew); %put N is &getn ; %put Skew is &getskew; %put SE of skew is &seskew ; %put Z score of skew is &zskew ; %put Kurtosis is &getkurtosis ; %put SE of kurtosis is &sekurt ; %put Z score of Kurtosis is &zkurt ; %mend; %seskewkurt(dataset, variable); /************************************************* ** ** ** 第二章 ** ** ** *************************************************/ **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; /***************************/ /* §2.2 SAS用作一般高级语言 */ /***************************/ /* PUT语句 */ data; x=0.5; y=sin(x); put "sin(" x ")=" y; run; /* 用PUT语句做表 */ data _null_; set samp.class; *put name sex age height weight; IF _n_=1 THEN DO; put 'Name Sex Age Height Weight'; put '-----------------------------'; END; put name $10. sex $2. age 3. height 6.1 weight 7.1; run; data _null_; set samp.class; put '姓名: ' name / '性别: ' sex / '年龄: ' age / '身高: ' height 6.1 / '体重: ' weight 6.1 /; run; /* 用FILE和PUT语句生成平方数表 */ data _null_; file 'sq.txt'; put ' x y'; do n=1 to 100; y = n**2; put n 3. y 6.; end; run; /* 用FILE的DLM=','和PUT语句生成CSV的平方数表 */ data _null_; file 'sq.csv' dlm=','; put 'x,y'; do n=1 to 100; y = n**2; put n y; end; run; /* 用PUT语句中加逗号生成CSV的平方数表,允许使用格式 */ data _null_; file 'sq.csv' dlm=','; put 'x,y'; do n=1 to 100; y = n**2; put n 4. ',' y 8.; end; run; /* 例:用FILE语句加DLM选项生成CSV文件。PUT语句中变量不能使用输出格式*/ data _NULL_; set samp.c9501; file 'c9501new.csv' dlm=','; if _n_=1 then put 'name,sex,math,chinese,avg'; put name sex math chinese avg; run; /* 例:计数循环 */ data; DO i = 1 TO 20 BY 2; j = i**3; put i 3. j 5.; END; run; data; DO i = 19 TO 1 BY -2; j = i**3; put i 3. j 5.; END; run; /* 例:列数循环 */ data; do mon='Jan', 'Feb', 'Mar'; put mon $4. ' is spring.'; end; run; /* 例:用当型循环求素数 */ data; x=1333333; *x=28338943; i=3; DO WHILE (mod(x,i) ^= 0); i=i+2; END; if i100 then chinese=100; run; /* 用KEEP语句取列子集 */ data c9501b; set c9501; keep name avg; run; /* 用DROP语句取列子集 */ data c9501b; set c9501; drop sex math chinese; run; /* 数据集输入选项 */ data c9501b; set c9501(keep=name avg); run; /* 数据集输出选项 */ data c9501b(keep=name avg); set samp.c9501; run; /* 数据集横向拆分 */ data a(keep=name sex) b(keep=name math chinese); set samp.c9501; run; /* 产生一个行、列数都较大的数据集 */ data huge; array x(10); do i=1 to 10000; do j=1 to 10; x(j) = normal(0); end; output; end; drop i j; run; /* 用数据集选项选行、列子集 */ data new; set huge(obs=100 keep=X1 X2); run; /* 随机抽样 */ data new2; set huge(keep=X1 X2); sortid=uniform(111); run; proc sort data=new2; by sortdid; run; data new3; set new2(obs=100); run; /* 子集IF语句 */ data c9501c; set c9501; IF math>=90 and chinese>=100; run; /* 取行子集的WHERE语句 */ data c9501c; set c9501; WHERE math>=90 and chinese>=100; run; /* 取行子集的WHERE选项 */ data c9501c; set c9501(WHERE= (math>=90 and chinese>=100)); run; /* 用WHERE选项纵向拆分数据集 */ data c9501m(where=(sex='男')) c9501f(where=(sex='女')); set samp.c9501; run; proc print data=c9501m;run; proc print data=c9501f;run; /* 用SET和OUTPUT纵向拆分数据集 */ data c9501m c9501f; set c9501; select(sex); when('男') output c9501m; when('女') output c9501f; otherwise put sex= '有错'; end; drop sex; run; proc print data=c9501m;run; proc print data=c9501f;run; /* 自定义循环内部需要用OUTPUT语句输出观测 */ data sq; do i=1 to 10; j=i*i; output; end; run; proc print;run; /* INFILE读取CSV文件,数据有用逗号分隔的向量用双撇号保护, 有日期和时间在一起的逗号分隔向量。 原始数据在sampled子目录中。 */ %LET infile='multiseries.csv'; %LET outfile='table.csv'; %LET DateOrigin='01OCT2011:00:00:00'dt; data k; length iobs dt date time y1-y7 8; length dts $ 9999 y1s y3s y4s $ 9999; infile &infile missover dsd lrecl=64000 pad firstobs=2; input dts $ y1s $ y2 y3s $ y4s $ y5 y6 y7; iobs = _n_; j = 0; do while (1); j + 1; s = scan(dts, j, ','); if s = ' ' then leave; s1 = scan(s, 1, ' '); s2 = scan(s, 2, ' '); date = input(s1, yymmdd10.); time = input(s2, time8.); dt = dhms(date, hour(time), minute(time), second(time)); dt = dt - &DateOrigin; s = scan(y1s, j, ','); y1 = input(s, 16.); s = scan(y3s, j, ','); y3 = input(s, 16.); s = scan(y4s, j, ','); y4 = input(s, 16.); output; end; format date yymmdd10.; format time time8.; *format dt datetime19.; keep iobs dt date time y1-y7; run; PROC EXPORT DATA=WORK.K OUTFILE= &outfile DBMS=CSV REPLACE; RUN; /* SET语句的POINT=选项 */ data new; do i=1 to 19 by 2; set samp.class point=i; output; end; stop; run; proc print;run; /* SET语句的END=选项 */ data one; input x @@; cards; 1 2 3 4 5 ; data two; set one end=lastline; avg + x; if lastline then do; avg = avg / _n_; output; end; keep avg; run; proc print;run; data three; set one; if _n_=1 then set two; run; proc print;run; data ab; length id 8 drug $1 effect 8; set aa; effect=a; drug='A'; output; effect=b; drug='B'; output; keep id drug effect; run; proc print;run; data count; set samp.gpa END=lastline; IF lastline THEN do; n=_n_; OUTPUT; end; keep n; run; proc print;run; /* RETAIN语句 */ data aa; retain prod 1; input x @@; prod = prod * x; cards; 1 2 3 4 5 ; run; proc print;run; data ac; retain x1 0; input x @@; d = x - x1; x1 = x; cards; 1 2 5 9 16 ; run; proc print;run; /* RETAIN语句,只保留最后一行。 infile语句的eof=指定一个标签, 最后一行时运行跳到标签处。 如果是正常的外部文件就可以用END=指定一个指示变量。 */ data ad; retain prod 1; infile datalines eof=lastline; input x @@; prod = prod * x; delete; keep prod; lastline: output; return; cards; 1 2 3 4 5 ; run; proc print;run; /* SET语句应该不把变量置为缺失,此例是一个反例: SET只保证用SET语句读入的变量在下一轮隐含循环 不被置为缺失,但是用赋值定义的变量在下一轮隐含 循环中仍置为缺失。 */ data aa; input x @@; cards; 1 2 3 4 5 ; run; data ab; put _n_ 2. ' 1:' prod= x=; if _n_=1 then prod = 1; put _n_ 2. ' 2:' prod= x=; set aa; put _n_ 2. ' 3:' prod= x=; prod = prod * x; put _n_ 2. ' 4:' prod= x=; run; proc print;run; /* 纵向合并 */ data class1; input id a; cards; 1 11 2 12 ; data class2; input a id; cards; 21 3 22 4 ; data class3; input id b; cards; 5 91 6 92 ; run; data classes; set class1 class2 class3; run; /* 纵向合并,包含来源信息 */ data new; set c9501m(in=male) c9501f(in=female); if male=1 then sex='男'; if female=1 then sex='女'; run; data new; length name $ 10 sex $ 8 math 8 chinese 8 avg 8; set samp.c9501m(IN=m) samp.c9501f(IN=f); IF m THEN sex='Male'; IF f THEN sex='Female'; run; %p /* 直接按行号进行横向合并 */ data c9501u(keep=name sex) c9501v(keep=math) c9501w(keep=chinese); set samp.c9501; run; data new; merge c9501u c9501v c9501w; run; /* 横向合并例子,一对一 */ data c9501x; set samp.c9501; keep name sex; run; data c9501y; set samp.c9501; keep name math chinese; run; data c9501x(keep=name sex) c9501y(keep=name math chinese); set samp.c9501; run; proc sort data=c9501x; by name; run; proc sort data=c9501y; by name; run; data new; merge c9501x c9501y; by name; run; proc print;run; /* 不匹配的行的处理: 包含不匹配的行 */ data d1; input id $ x; cards; a 1 b 2 ; data d2; input id $ y; cards; a 21 c 23 ; proc sort data=d1; by id; proc sort data=d1; by id; data new; merge d1 d2; by id; proc print;run; /* 不匹配的行的处理: 仅包含匹配的行 */ data new; merge d1(IN=ina) d2(IN=inb); by id; IF ina and inb; proc print;run; /* 不匹配的行的处理: 仅包含出现于A的行 */ data new; merge d1(IN=ina) d2; by id; IF ina; proc print;run; /* 一对多横向合并例子 */ data d1; input id $ x; cards; a 11 b 12 ; run; data d2; input id $ y; cards; a 21 a 22 ; run; data new; merge d1 d2; by id; run; proc print;run; /* 另一个一对多合并例子 */ data aa; set samp.c9501; good = math>=85 AND chinese>=100; run; proc print;run; data te; input good teacher $; cards; 1 AAA 0 BBB ; run; proc sort data=aa; by good; proc sort data=te; by good; data ab; merge aa te; by good; run; proc print;run; /* 多对多的横向合并例子 */ data d3; input id $ x; cards; a 11 b 12 a 13 ; run; data d4; input id $ y; cards; a 21 a 22 b 23 a 24 ; run; proc sort data=d3; by id; proc sort data=d4; by id; data new; merge d3 d4; by id; run; proc print;run; /* 用UPDATE语句更新数据集 */ data upd; input name $ sex $ chinese; cards; 张红艺 男 . 王思明 . 91 ; run; proc sort data=c9501; by name; run; proc sort data=upd; by name; run; data new; update c9501 upd; by name; run; proc print;run; /* 更新时对需要重新计算的列按需重新计算 */ data new; update c9501 upd(in=in_upd); if in_upd=1 then avg = math*0.5 + chinese/120*100*0.5; by name; run; proc print;run; **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; /***************************/ /* §2.4 宏 */ /***************************/ /* 宏的典型例子 */ %MACRO SALREAD; %DO NP=1 %TO 10; %let ff="df&NP..txt"; %let fd=df&NP; data &fd; infile &ff; input date yymmdd10. sales; persid=&NP; run; %END; %LET setstm=SET; %DO NP=1 %TO 10; %LET setstm=&setstm df&NP; %END; %PUT &setstm; data whole; &setstm; run; %MEND SALREAD; %SALREAD; /******************************************/ /* 宏变量例子 */ /******************************************/ /* 自动宏变量例子 */ proc print data=samp.class noobs label; title 'Listing of the CLASS dataset'; footnote1 "Created &systime. &sysday, &sysdate9."; footnote2 "on the &sysscp. System using Release &sysver."; run; /* 自定义宏变量例子 */ %let var1= SAS Macro; %let var2 = "SAS Macro"; %let var3 = "SAS' Macro"; %let var4 = 3+4; %PUT &var1. &var2. &var3.; %PUT The value of var4 is &var4.; /* 宏PUT的特殊用法 */ %put _all_; %put ERROR: unexpected result.; %put WARNING: maybe wrong.; %put NOTE: some bad data?; /* 宏变量定义中使用宏替换的例子 */ /* 假设有数据文件file11.txt,其中包含日期和销售额数据 */ %LET i=11; %LET fname=file&i; %LET ff="&fname..txt"; %PUT &ff.; data &fname.; infile &ff.; input date yymmdd10. sales; persid=&i; run; /* 这是前面的宏展开后的效果 */ data file11; infile "file11.txt"; input date yymmdd10. sales; persid=11; run; /* 多个&引发两次扫描的例子 */ %LET varpre=sale; %LET k=95; %LET sale95=15.3; %PUT &sale95; %PUT &varpre&k; %PUT &&sale&k; %PUT &&varpre&k; %PUT &&&varpre&k; /* 自动宏变量例子 */ %PUT &sysdate &sysdate9 &sysday; %PUT &syssite &sysscp &sysscpl &sysuserid; /* 用宏变量避免重复的例子 */ %LET dsn=samp.class; title "Dataset &dsn"; proc contents data=&dsn.; run; proc print data=&dsn.(obs=10); run; title; /******************************************/ /* 宏例子 */ /******************************************/ /* 最简单的宏:只是替换一个语句中的一段 */ /* 原意 */ data new; set c9501f c9501m; run; /* 使用宏替换语句片段 */ %macro ds; c9501f c9501m %mend ds; data new2; set %ds; run; /* 使用宏替换片段, 包括分号在内 */ %macro dsc; c9501f c9501m; %mend dsc; data new3; set %dsc run; /* 使用宏替换一段, 宏定义中调用宏变量,宏变量可以在调用前才赋值 */ %macro dsv; &d1 &d2 %mend dsv; %let d1=c9501f; %let d2=c9501m; data new4; set %dsv; run; /* 使用宏替换一段,带有宏参数 */ %macro abc(d1,d2); &d1 &d2 %mend abc; data new5; set %abc(c9501f,c9501m); run; /* 简单的宏,无参数, 包含一段SAS程序 */ %MACRO dsinfo; title "Dataset &dsn"; proc contents data=&dsn.; run; proc print data=&dsn.(obs=10); run; title; %MEND dsinfo; %LET dsn=samp.class; %dsinfo /* 带有参数的宏 */ %MACRO dsinfov(dsn); title "Dataset &dsn"; proc contents data=&dsn.; run; proc print data=&dsn.(obs=10); run; title; %MEND dsinfov; %dsinfov(samp.class) /* 带有参数的宏:横向合并 */ %macro merge2(d1,d2,dout, byvar); proc sort data=&d1.; by &byvar.; proc sort data=&d2.; by &byvar.; data &dout.; merge &d1. &d2.; by &byvar.; run; %mend merge2; %merge2(c9501x, c9501y, new6, name) /* 带有缺省参数的宏 */ %MACRO dsinfod(dsn=&syslast, nobs=10); title "Dataset &dsn"; proc contents data=&dsn.; run; proc print data=&dsn.(obs=&nobs.); run; title; %MEND dsinfod; %dsinfod(dsn=samp.class, nobs=5) data; x=1;y=2; run; %dsinfod() %dsinfod(nobs=5, dsn=samp.class) /* 用宏屏蔽一段程序 */ data; x=1;y=2; run; %MACRO debug1; proc contents data=&syslast; run; proc print data=&syslast(obs=5); run; %MEND debug1; %*debug1; %LET DEBUG=*; %MACRO debug1; proc contents data=&syslast; run; proc print data=&syslast(obs=5); run; %MEND debug1; &DEBUG %debug1; /******************************************/ /* 宏的流程控制语句例子 */ /******************************************/ /* 宏IF选择程序片段 */ data aa; input x y; cards; 1 2 11 12 ; run; %MACRO myprog1(dsn=&syslast., neat=yes); proc print data=&dsn.(obs=10) %IF &neat=yes %THEN label noobs; %ELSE double; ; run; %MEND myprog1; options mprint; %myprog1(dsn=aa, neat=yes) %myprog1(dsn=aa, neat=no) /* 宏IF与复合语句 */ %MACRO myprog1b(dsn=&syslast., neat=yes); %IF &neat=yes %THEN %DO; proc print data=&dsn.(obs=10) label noobs; %END; %ELSE %DO; proc print data=&dsn.(obs=10) double; %END; run; %MEND myprog1b; %myprog1b(dsn=aa, neat=yes) %myprog1b(dsn=aa, neat=no) /* 宏IF选择程序段 */ %MACRO myprog2(dsn=&syslast., debug=); %IF &debug NE %THEN %DO; proc print data=&dsn.(obs=10); run; %END; proc means data=&dsn; var x y; run; %MEND myprog2; options mprint; %myprog2(dsn=aa, debug=true) %myprog2(dsn=aa) /* 最简单的宏循环 */ %MACRO mac; %DO i=1 %TO 10; %PUT &i.; %END; %MEND mac; %mac /* 用宏循环读入多个数据文件的例子。带有生成数据部分。 */ %MACRO gen(nfiles); %DO i=1 %TO &nfiles.; data _null_; file "test\df&i..txt"; x1 = (&i. - 1)*10 + 1; x2 = &i. * 10; do x=x1 to x2; y = x*x; put x 5. y 8.; end; run; %END; %MEND gen; %gen(5); %MACRO rd(nfiles); %DO i=1 %TO &nfiles.; data d&i.; infile "test\df&i..txt"; input x y; run; %END; %LET s=SET; %DO i=1 %TO &nfiles.; %LET s=&s. d&i.; %END; %PUT &s.; data dd; &s.; run; %MEND rd; %rd(5); /* 合并的另一办法 */ %MACRO solb(nfiles); data dd; set %DO i=1 %TO &nfiles.; d&i. %END; ; run; %MEND slob; /* 宏运行相关系统选项 */ /* MPRINT选项要求显示宏运行产生的SAS程序片段 */ options mprint; data; x=11; y=12; run; %MACRO debug1; proc print data=&syslast; run; %MEND debug1; %debug1 /******************************************/ /* 宏引文例子 */ /******************************************/ /* 用%STR()包含分号、不配对撇号、括号等 */ %LET myvar=%STR(a%'); %PUT &myvar; %LET myvar=%STR(b%"); %PUT &myvar; %LET myvar=%STR(log%(12); %PUT &myvar; %LET myvar=%STR(345%)); %PUT &myvar; %LET myvar=%STR(90%%); %PUT &myvar; %LET myvar=%STR(90%%%'); %PUT &myvar; %LET printit=%STR(proc print; run;); /* %STR()中仍可替换宏变量和宏 */ %LET var1=SAS Macro; %LET var2=%STR(New &var1); %PUT &var2; /* %NRSTR()中禁止替换宏变量和宏 */ %LET var1=SAS Macro; %LET var2=%NRSTR(John%'s &var1); %PUT &var2; %PUT This is the result of %NRSTR(%NRSTR); /*'*/ /* %NRSTR()中禁止替换宏变量和宏,保护宏自变量缺省值 */ %MACRO credits(d=%NRSTR(Mary&Stacy&Joan Ltd.)); footnote "Designed by &d"; %MEND credits; %credits() /* %BQUOTE保护充分替换后的结果而不是本身 */ %LET var1='abc'; %LET var2=%BQUOTE(%SUBSTR(&var1,1,3)); %PUT &var2; data _null_; call symputx('var', 'B&G Coorporated'); run; %LET com=%NRBQUOTE(&var); %PUT &com; /* %UNQUOTE解除保护 */ %LET city=Beijing; %LET oth=%nrstr(&city); %LET unq=%unquote(&oth); %put oth: &oth; %put unq: &unq; /******************************************/ /* 宏函数例子 */ /******************************************/ /* 字符型宏函数。%LENGTH求长度。注意提示文本不需要撇号包围。 */ %LET var1=Macro; %LET VL=%LENGTH(&var1.); %PUT &var1. 长度为 &vl.; %LET var2=; %LET VL2=%LENGTH(&var2.); %PUT 宏变量var2长度为 &vl2.; /* %SUBSTR函数*/ %LET ss=A brown fox; %LET s1=%SUBSTR(&ss, 3, 5); %PUT &s1; /* %INDEX函数*/ %LET ss=A brown fox; %LET ii=%INDEX(%BQUOTE(&ss), ow); %PUT ow 在 &ss.的第&ii.位置出现; /* %SCAN, 不指定分隔符*/ %LET var1=NO.1 student, excellent!; %LET x1=%SCAN(%BQUOTE(&var1),1); %LET x3=%SCAN(%BQUOTE(&var1),3); %LET x4=%SCAN(%BQUOTE(&var1),4); %LET xe=%SCAN(%BQUOTE(&var1),99); %PUT &x1 &x3 &x4 &xe; /* %SCAN, 用%STR保护分隔符, 用%BQUOTE保护自变量值*/ %LET var1=NO.1 student, excellent!; %LET x = %SCAN(%BQUOTE(&var1), 2, %STR( )); %PUT &var1.中第二项为&x.; /* 用%QSCAN和循环读取列表中每一个*/ %MACRO exscan(source); %LET i=1; %LET x=%QSCAN(%BQUOTE(&source), &i, %STR( )); %DO %WHILE( &x ^= %STR()); %PUT Item.&i &x; %LET i=%EVAL(&i+1); %LET x = %QSCAN(%BQUOTE(&source), &i, %STR( )); %END; %MEND exscan; %LET var1=NO.1 student, excellent!; %exscan(%BQUOTE(&var1)) /* 在宏IF和宏循环条件中数值表达式自动计算。*/ %macro impcomp; %let x = 1+2; %IF &x=3 %THEN %put 自动计算表达式.; %ELSE %put 没有计算; %mend impcomp; %impcomp /* 在宏IF和宏循环条件中数值表达式自动计算不能用实数。*/ %macro prb; %IF 10.0>9.0 %THEN %put 程序正确; %ELSE %put 不能进行非整数比较!; %mend prb; %prb /* prb的改正 */ %macro prbc; %IF %sysevalf(10.0>9.0) %THEN %put 程序正确; %ELSE %put 不能进行非整数比较!; %mend prbc; %prbc /* 数值表达式计算。%EVAL只能进行整数类型的计算。 */ %LET y1=2; %LET y2 = 2+1; %LET y3=%EVAL(&y2 + 1); %LET y4=%SYSEVALF(4.8/2); %PUT &y1 &y2 &y3 &y4; /* 用%SYSFUNC调用数据步函数 */ %LET x=3.14159/2; %LET y=%SYSFUNC(sin(&x), 6.2); %PUT &y; /* 用%SYSFUNC可以调用PUTN,不能用PUT */ %LET s=%SYSFUNC(putn(1,Z3.)); %PUT &s; /* 用%SYSFUNC和PUTN转换日期格式 */ %LET s=%SYSFUNC(putn("&sysdate9"d, yymmdd10.)); %PUT &sysdate9 &s; footnote "演示结果:%SYSFUNC(putn("&sysdate9"d, yymmdd10.))"; /*************************************/ /* 自定义宏函数 */ /*************************************/ /* 用宏变量传递结果 */ %macro dsexist(dsn); %global exist; %if &dsn ne %then %do; data _null_; stop; set &dsn; run; %end; %if &syserr=0 %then %let exist=1; %else %let exist=0; %mend dsexist; %dsexist(sasuser.mydat) data new; if &exist then dsname='sasuser.mydat'; run; /* 宏函数exist判断数据集是否存在 */ %macro exist(dsn); %sysfunc(exist(&dsn)) %mend exist; %macro test(dsn); %if %exist(&dsn) %then %put &dsn.数据集存在; %else %put &dsn.数据集不存在; %mend test; %test(samp.class) %test(samp.notexist) /* 宏函数currdate返回年月日格式的当前日期 */ %macro currdate; %qsysfunc(date(), yymmdd10.) %mend currdate; title 'C9501数据集列表'; footnote "演示日期: %currdate"; proc print data=samp.c9501;run; /*************************************/ /* 宏与数据步的信息交换 */ /*************************************/ /* SYMPUTX 的例子 */ %MACRO spe1; data books; set samp.c9501bk end=lastobs; ta + amount; if lastobs then do; call symputx('nobs', _n_); call symputx('total', ta); end; drop ta; run; footnote "&nobs.个人共花费&total.元"; proc print;run; footnote; %MEND spe1; %spe1 /* SYMPUTX 的例子 */ data aa; input cid city $; cards; 110 北京 230 上海 ; run; %MACRO spe2; data _null_; set aa; call symputx('city' || put(cid,best3.), city); run; %put &city110 &city230; %MEND spe2; %spe2 /* 数据步中静态使用宏变量的例子 */ %LET nmax=5; DATA aa(drop=i j); array x(&nmax); do i=1 to 20; do j=1 to &nmax; x(j) = normal(112233); end; output; end; run; /* 数据步中静态使用宏变量的例子: 赋值 */ %LET nobs=5; DATA ab; n = &nobs; do i=1 to n; j = i*i; output; end; run; /* 数据步中静态使用宏变量的例子: RETAIN */ %LET nobs=5; DATA ac; retain n &nobs; do i=1 to n; j = i*i; output; end; run; /* SYMGET 的例子 */ data bb; input cid temp; cards; 110 20 230 30 ; %MACRO spe3; data bc; set bb; city = symget('city' || put(cid, best3.)); run; proc print;run; %MEND spe3; %spe3 /* 用数据集控制宏的运行 */ data cntr; length dsname $ 20; input dsname $ nobs; cards; samp.class 5 ; run; %MACRO controled; data _null_; set cntr; call symputx('dsn', dsname); call symputx('nobs', nobs); run; title "Dataset &dsn"; proc contents data=&dsn.; run; proc print data=&dsn.(obs=&nobs.); run; title; %MEND controled; %controled /* 用多行数据集控制多次运行 */ data cntr; length dsname $ 20; input dsname $ nobs; cards; samp.class 5 sashelp.air 8 ; run; %MACRO controled; data _null_; set cntr end=lastobs; call symputx('dsn' || trim(left(put(_n_, best12.))), dsname); call symputx('nobs' || trim(left(put(_n_, best12.))), nobs); if lastobs then call symputx('npars', _n_); run; %DO ipar=1 %TO &npars; title "Dataset &&dsn&ipar"; proc contents data=&&dsn&ipar; run; proc print data=&&dsn&ipar(obs=&&nobs&ipar); run; %END; title; %MEND controled; options symbolgen mprint; %controled /* 控制数据集中同时保存宏变量名和宏变量值 */ data cntr; length varname varval $ 40; input varname $ varval $; cards; dsn samp.class ; run; %macro controled; data _null_; set cntr; call symputx(varname, varval); call symputx('varname', varname); run; %PUT varname is &varname; %PUT &varname stores &&&varname; %mend controled; %controled /* 控制数据集中同时保存多组宏变量名和宏变量值 */ /* 此例的结果还不理想,不论是&&&vars&i还是&&&&vars&i都不能返回samp.class */ data cntr; length varname varval $ 40; input varname $ varval $; cards; dsn samp.class nobs 5 ; run; %macro controled; data _null_; set cntr end=lastobs; call symputx(varname, varval); call symputx('vars' || trim(left(put(_n_, 8.))), varname); if lastobs then call symputx('nvars', _n_); run; %DO i=1 %TO &nvars; %PUT vars&i is &&vars&i; %LET tmpname=&&vars&i; %PUT &&vars&i stores &&&tmpname; %END; %mend controled; %controled /* SELECT INTO生成宏变量的例子 */ proc sql noprint; select count(*), sum(amount) format=best5. into :nstudent, :total from samp.c9501bk; quit; %put &nstudent. &total.; /* SELECT INTO生成宏变量数组的例子 */ proc sql noprint; select name, amount into :name1-:name9999, :amount1-:amount9999 from samp.c9501bk; %let nstudents=&sqlobs; quit; %put &nstudents; %put &name3 &amount3; /* SELECT INTO生成空格分隔值列表宏变量 */ proc sql noprint; select name, amount INTO :names SEPARATED BY ' ', :amounts SEPARATED BY ' ' FROM samp.c9501bk; quit; %put &names --- &amounts; /* 利用宏变量数组 */ %macro ma; %do ii=1 %to &nstudents.; %put 第&ii.个学生是:&&name&ii.; %end; %mend ma; %ma /* 利用空格分隔值列表宏变量 */ %macro vl(vars); %do ivar=1 %to 100000; %let val=%scan(&vars., &ivar.); %if &val.= %then %goto endd; %put NO.&ivar. &val; %end; %endd: %mend; %vl(&names) /* 宏数组的例子。使用&&, &&& */ %macro test; %LET v1=x; %LET v2=y; %LET v3=z; %LET nv=3; %LET x=11; %LET y=12; %LET z=13; %DO ii=1 %TO &nv.; %LET cv=&&vⅈ %PUT &&&cv; %END; %mend test; %test /******************************************/ /* 宏应用实例 */ /******************************************/ /* 宏应用实例:用宏循环按某变量分组后运行多个过程。 * 希望对class数据集,按性别分组后对每组 * 分别用PROC MEANS和PROC TABULATE计算简单统计量。 * 如果使用BY语句,则每个过程的两个组是在一起的。 * 我们希望每个组的两个分析在一起。 */ %macro exa; %let vname=sex; %let dsn=samp.class; %let vars=height weight; %let outrtf=testout.rtf; proc sql noprint; select distinct &vname. into :disv1-:disv999 from &dsn.; quit; %let ndisv=&sqlobs.; ods rtf file="&outrtf." startpage=no bodytitle; ods noproctitle; %do i=1 %to &ndisv.; title "分组:&vname.=&&disv&i"; proc means data=&dsn mean std; where &vname="&&disv&i"; var &vars.; run; title; proc tabulate data=&dsn; where &vname="&&disv&i"; var &vars.; table &vars., mean std; run; %end; ods rtf close; %mend exa; %exa /* 宏应用实例:根据某分组变量拆分数据集. * 生成subd1和subd2数据集,分别包含女生和男生观测。 */ %macro exa; %let dsn=samp.class; %let vname=sex; proc sql noprint; select distinct &vname. into :disv1-:disv999 from &dsn.; quit; %let ndisv=&sqlobs.; data %do i=1 %to &ndisv.; subd&i %end;; set &dsn; %do i=1 %to &ndisv.; %if &i>1 %then else; if &vname="&&disv&i" then output subd&i; %end; run; %mend exa; %exa /* 读取存放在一个文件列表中的各个文件的内容 */ %macro readb(indexf); data index; length fname $ 20; infile "&indexf"; input fname $; run; proc print;run; data _null_; set index end=lastline; if lastline then call symputx('nfiles', _n_); run; %DO ii=1 %TO &nfiles; data _null_; set index(firstobs=&ii obs=&ii); call symputx('fname', fname); run; data newⅈ infile "test\&fname"; input x y; run; %END; %mend readb; %readb(test\flist.txt) **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; /******************************************/ /* PROC SQL例子 */ /******************************************/ /* PROC SQL 例子: 全部内容 */ proc sql; select * from samp.c9501; quit; /* PROC SQL 例子: 列子集 */ proc sql; select name, math from samp.c9501; quit; /* PROC SQL 例子: 行子集 */ proc sql; select name, math from samp.c9501 where sex='男'; quit; /* PROC SQL 例子: DISTINCT关键字 */ proc sql; select DISTINCT sex from samp.c9501; quit; /* PROC SQL 例子: 定义新变量 */ proc sql; select name, math+chinese AS total from samp.c9501; quit; proc sql; select name, math+chinese AS total from samp.c9501 where CALCULATED total>=200; quit; /* PROC SQL 例子: ORDER BY子句 */ proc sql; select name, math from samp.c9501 where sex='男' ORDER BY math DESC; quit; /* PROC SQL 例子: GROUP BY子句分组汇总 */ proc sql; select sex, mean(math) as mm from samp.c9501 GROUP BY sex; quit; /* PROC SQL 例子: 用GROUP BY子句和HAVING子句挑选分组 */ proc sql; select sex, mean(math) as mm from samp.c9501 GROUP BY sex HAVING mean(chinese)>=100; quit; proc sql; select sex, mean(math) as mm from c9501 GROUP BY sex HAVING mm>=90; quit; /* PROC SQL 例子: 查询结果保存到数据集 */ proc sql; CREATE TABLE subd AS select name, math from samp.c9501 where sex='男' order by math desc; select * from subd; quit; /* PROC SQL 例子: 查询结果保存到视图 */ proc sql; CREATE VIEW totd AS select name, math+chinese AS total from samp.c9501; quit; proc print data=totd;run; /* 视图的例子 */ data orig; input name $ x1 x2; cards; A 1 2 B 3 4 ; run; proc sql; create view oview as select name, x1+x2 as tot from orig; quit; proc print data=oview;run; data orig; input name $ x1 x2; cards; A -1 -2 B -3 -4 ; run; proc print data=oview;run; /* PROC SQL 例子: 一对一连接 */ proc sql; create table c9501x AS select name, sex from samp.c9501; create table c9501y AS select name, math, chinese from samp.c9501; quit; proc sql; select a.name, math from c9501x AS a, c9501y AS b where a.name=b.name and sex='男' order by math desc; quit; /* 用数据步做一对一连接且只保留匹配行 */ proc sort data=c9501x; by name; proc sort data=c9501y; by name; data new; merge c9501x(IN=_a) c9501y(IN=_b); by name; IF _a AND _b; run; /* PROC SQL 例子: 一对多连接 */ data d1; input id $ x; cards; a 11 b 12 ; data d2; input id $ y @@; cards; a 21 a 22 ; proc sql; select a.id, x, y from d1 AS a, d2 AS b where a.id=b.id; quit; /* PROC SQL 例子: 笛卡儿积式连接 */ data d1; input x @@; cards; 11 12 ; data d2; input y @@; cards; 21 22 ; proc sql; select x, y from d1, d2; quit; /* PROC SQL 例子: 多对多连接 */ data d3; input id $ x; cards; a 11 b 12 a 13 ; run; data d4; input id $ y; cards; a 21 a 22 b 23 a 24 ; run; proc sql; select a.id, x, y from d3 AS a, d4 AS b where a.id=b.id; quit; /* 用数据步做多对多连接。a组只有3行结果 */ proc sort data=d3; by id; proc sort data=d4; by id; data new; merge d3 d4; by id; run; proc print;run; /* PROC SQL 例子: 一对一连接不匹配丢弃 */ data d5; input id $ x; cards; a 11 b 12 ; data d6; input id $ y; cards; a 21 c 23 ; run; proc sql; select d5.id, x, y from d5, d6 where d5.id=d6.id; quit; /* PROC SQL 例子: LEFT JOIN */ proc sql; select a.id, x, y from d5 AS a LEFT JOIN d6 AS b ON a.id=b.id; quit; /* PROC SQL 例子: RIGHT JOIN */ proc sql; select d5.id, x, y from d5 RIGHT JOIN d6 ON d5.id=d6.id; quit; /* PROC SQL 例子: FULL OUTER JOIN */ proc sql; select d5.id, x, y from d5 FULL OUTER JOIN d6 ON d5.id=d6.id; quit; proc sql; select COALESCE(d5.id, d6.id) AS id, x, y from d5 FULL OUTER JOIN d6 ON d5.id=d6.id; quit; /* IN 条件 */ proc sql; select * from samp.class where age IN (15,16); quit; /* IN 条件与子查询 */ proc sql; select name, math from c9501 AS a where sex IN (SELECT sex from c9501 AS b group by sex having mean(chinese)>=100); quit; proc sql; select * from samp.class where age IN ( select age from samp.class group by age having mean(height)>=65 ) ; quit; /* 用PROC SQL做非一对一横向合并 */ PROC SQL; SELECT * FROM (SELECT *, math>=85 AND chinese>=100 AS good FROM samp.c9501) AS a, te WHERE a.good=te.good; QUIT; /* 用PROC SQL求同生日 */ title '找出生日相同的人: PROC SQL'; data cb; input name $ 1-8 birth :yymmdd10.; format birth yymmdd10.; label name='姓名' birth='生日'; cards; 李明 78-6-1 王思明 78-5-19 张聪 78-6-1 刘颖 78-10-18 张红艺 79-5-19 ; run; proc sql; select name, birth from cb a where birth in ( select birth from cb b where b.name ^= a.name) order by a.birth; run;quit; /* 横向合并的方法 */ proc sql; select a.name, a.birth from cb AS a, cb AS b where a.name ^= b.name and a.birth=b.birth order by a.birth; run;quit; /* 用CREATE TABLE语句保存查询结果到数据集。 */ proc sql; CREATE TABLE bsame AS select name, birth from cb a where birth in select birth from cb b where b.name ^= a.name order by a.birth; run;quit; proc print data=bsame label; id name; by birth; run; /* 只要月、日相同就算同生日。用了WHERE中的计算 */ proc sql; select a.name, a.birth from cb AS a, cb AS b where a.name ^= b.name and month(a.birth)=month(b.birth) and day(a.birth)=day(b.birth) order by month(a.birth), day(a.birth); run;quit; /* 只要月、日相同就算同生日。用了CREATE VIEW和DROP VIEW */ proc sql; create view datamd as select name, birth, month(birth)*100+day(birth) AS md from cb; select name, birth from datamd a where md in ( select md from datamd b where b.name ^= a.name) order by a.md; drop view datamd; run;quit; /* 用数据步和FREQ找同生日 */ title '找出生日相同的人: PROC FREQ'; proc freq data=cb noprint; tables birth / out=bfreq; run; proc sort data=cb; by birth; proc sort data=bfreq; by birth; data bsame; merge cb bfreq; by birth; IF count>1; run; proc print data=bsame label noobs; var name; by birth; id birth; run; proc datasets library=work nolist; delete bfreq bsame; run; /* 完全用数据步找同生日, 用RETAIN语句。 对三个人同生日会有重复输出。 */ title '找出生日相同的人: 数据步'; proc sort data=cb; by birth; run; proc print;run; data b; length name1 $12; set cb; retain birth1 name1; /* 用birth1, name1保留上一行观测 */ IF birth=birth1 THEN DO; name2=name; birth2=birth; name=name1; birth=birth1; output; name=name2; birth=birth2; output; END; name1=name; birth1=birth; keep name birth; run; proc print; run; /* 完全用数据步找同生日, 用BY组的LAST.和FIRST.功能 */ proc sort data=cb; by birth; run; data c; set cb; by birth; if first.birth and last.birth then delete; run; proc print;run; /* 工资单例子: 职工号(IdNumber)、性别(Sex)、工作类型代码(Jobcode)、 * 工资(Salary)、生日(Birth)、入职日期(Hired)。 * 可用于SQL。 */ data payroll; input IdNumber $ 1-4 Sex $ 6 Jobcode $ 8-10 Salary 12-16 @18 Birth date7. @26 Hired date7.; format birth hired mmddyy8.; datalines; 1009 M TA1 28880 02MAR59 26MAR92 1017 M TA3 40858 28DEC57 16OCT81 1036 F TA3 39392 19MAY65 23OCT84 1037 F TA1 28558 10APR64 13SEP92 1038 F TA1 26533 09NOV69 23NOV91 1050 M ME2 35167 14JUL63 24AUG86 1065 M ME2 35090 26JAN44 07JAN87 1076 M PT1 66558 14OCT55 03OCT91 1094 M FA1 22268 02APR70 17APR91 1100 M BCK 25004 01DEC60 07MAY88 ; run; /* 用PROC SQL查询,分组计算平均*/ proc sql; select Jobcode, count(jobcode) as number label='Number', avg(int((today()-birth)/365.25)) as avgage format=2. label='Average Age', avg(salary) as avgsal format=dollar8. label='Average Salary' from payroll group by jobcode having avgage ge 30; title1 'Summary Information for Each Job Category'; title2 'Average Age 30 or Over'; quit; /* 用CREATE TABLE保存查询结果到数据集 */ proc sql; create table bonus as select IdNumber, Salary format=dollar8., salary*.025 as Bonus format=dollar8. from payroll; title 'BONUS Information'; select * from bonus; quit; title; /* OilProd: 原油产量,包括Country(国家)、日产原油(BarrelsPerDay) */ data OilProd; input Country $ 1-24 @26 BarrelsPerDay comma9.; format barrelsperday comma9.; datalines; Algeria 1,400,000 Canada 2,500,000 China 3,000,000 Egypt 900,000 Indonesia 1,500,000 Iran 4,000,000 Iraq 600,000 Kuwait 2,500,000 Libya 1,500,000 Mexico 3,400,000 Nigeria 2,000,000 Norway 3,500,000 Oman 900,000 Saudi Arabia 9,000,000 United States of America 8,000,000 United Arab Emirates 2,000,000 United Kingdom 3,000,000 Venezuela 3,000,000 USSR (former) 7,000,000 ; run; /* OilRsrvs: 原油储量数据,变量Country(国家)、储量(Barrels) */ data OilRsrvs; input Country $ 1-24 @26 Barrels comma15.; format barrels comma15.; datalines; Algeria 9,200,000,000 Canada 7,000,000,000 China 25,000,000,000 Egypt 4,000,000,000 Gabon 1,000,000,000 Indonesia 5,000,000,000 Iran 90,000,000,000 Iraq 110,000,000,000 Kuwait 95,000,000,000 Libya 30,000,000,000 Mexico 50,000,000,000 Nigeria 16,000,000,000 Norway 11,000,000,000 Saudi Arabia 260,000,000,000 United Arab Emirates 100,000,000 United Kingdom 4,500,000,000 United States of America 30,000,000,000 Venezuela 65,000,000,000 USSR (Former) 65,500,000,000 ; run; /* INNER JOIN例子 */ proc sql; title 'Oil Production/Reserves of Countries'; select p.country, barrelsperday 'Production', barrels 'Reserves' from oilprod p, oilrsrvs r where p.country = r.country order by barrelsperday desc; quit; /* WorldCityCoords: 世界各大城市坐标数据, 变量 * City(城市名称)、Country(所在国家)、Latitude(纬度)、Longitude(经度). */ data worldcitycoords; input City $ 1-25 Country $ 28-46 Latitude 48-50 Longitude 53-56; datalines; Kabul Afghanistan 35 69 Algiers Algeria 37 3 Buenos Aires Argentina -34 -59 Cordoba Argentina -31 -64 Tucuman Argentina -27 -65 Adelaide Australia -35 138 Alice Springs Australia -24 134 Brisbane Australia -27 153 Darwin Australia -12 131 Melbourne Australia -38 145 Perth Australia -32 116 Sydney Australia -34 151 Vienna Austria 48 16 Nassau Bahamas 26 -77 Chittagong Bangladesh 22 92 Brussels Belgium 51 4 Belize Belize 17 -88 Kindley AFB Bermuda 33 -65 La Paz Bolivia -16 -69 Belem Brazil -1 -48 Belo Horizonte Brazil -20 -44 Brasilia Brazil -16 -48 Curitiba Brazil -25 -49 Fortaleza Brazil -4 -38 Porto Alegre Brazil -30 -51 Recife Brazil -9 -35 Rio de Janeiro Brazil -23 -43 Salvador Brazil -13 -38 Sao Paulo Brazil -23 -46 Sofia Bulgaria 43 23 Phnom Penh Cambodia 11 105 Calgary Canada 51 -114 Havre Canada 48 -110 Kingston Canada 44 -76 London Canada 43 -81 Moose Jaw Canada 50 -105 Montreal Canada 45 -73 Ottawa Canada 45 -76 Port Arthur Canada 48 -89 Quebec Canada 47 -71 St. John Canada 45 -66 Toronto Canada 44 -79 Victoria Canada 48 -123 Winnipeg Canada 50 -98 Punta Arenas Chile -53 -71 Santiago Chile -33 -71 Valparaiso Chile -33 -71 Chongquing China 29 106 Shanghai China 31 121 Baranquilla Colombia 11 -75 Bogota Colombia 4 -75 Cali Colombia 3 -76 Medellin Colombia 6 -75 Brazzaville Congo -4 15 Guantanamo Bay Cuba 20 -76 Havana Cuba 24 -82 Prague Czech Republic 51 14 Copenhagen Denmark 56 12 Santo Domingo Dominican Republic 18 -70 Cairo Egypt 30 31 San Salvador El Salvador 14 -89 Guayaquil Ecuador -21 -80 Quito Ecuador 0 -78 Addis Ababa Ethiopia 9 39 Asmara Ethiopia 15 39 Helsinki Finland 60 25 Lyon France 46 5 Marseilles France 43 5 Nantes France 47 -1 Nice France 44 7 Paris France 49 2 Strasbourg France 48 8 Cayenne French Guiana 5 -52 Berlin Germany 52 13 Hamburg Germany 53 10 Hannover Germany 52 10 Mannheim Germany 49 8 Munich Germany 49 11 Accra Ghana 5 0 Gibraltar Gibraltar 37 -5 Athens Greece 38 24 Thessaloniki Greece 40 23 Guatemala City Guatemala 14 -90 Georgetown Guyana 7 -58 Port Au Prince Haiti 18 -72 Tegucigalpa Honduras 15 -87 Hong Kong Hong Kong 22 114 Budapest Hungary 47 19 Reykjavik Iceland 65 22 Ahmenabad India 22 72 Bangalore India 13 77 Bombay India 19 73 Calcutta India 22 88 Madras India 14 80 Nagpur India 22 80 New Delhi India 28 77 Djakarta Indonesia -6 107 Kupang Indonesia -10 123 Makassar Indonesia -6 119 Medan Indonesia 3 99 Palembang Indonesia -3 105 Surabaya Indonesia -7 113 Abadan Iran 30 48 Meshed Iran 36 59 Tehran Iran 36 51 Baghdad Iraq 33 44 Mosul Iraq 36 44 Dublin Ireland 53 -6 Shannon Ireland 53 -9 Jerusalem Israel 32 35 Tel Aviv Israel 33 35 Milan Italy 45 9 Naples Italy 41 14 Rome Italy 42 12 Fukuoka Japan 33 130 Sapporo Japan 44 141 Tokyo Japan 36 140 Amman Jordan 32 36 Nairobi Kenya -1 37 Pyongyang Korea, North 39 126 Seoul Korea, South 37 127 Beirut Lebanon 34 35 Monrovia Liberia 6 -11 Benghazi Libya 33 21 Tananarive Madagascar -19 47 Kuala Lumpur Malaysia 4 102 Penang Malaysia 5 100 Guadalajara Mexico 21 -103 Merida Mexico 21 -89 Mexico City Mexico 19 -99 Monterrey Mexico 26 -100 Vera Cruz Mexico 19 -97 Casablanca Morocco 33 -7 Katmandu Nepal 28 85 Amsterdam Netherlands 52 5 Auckland New Zealand -37 175 Christchurch New Zealand -43 172 Wellington New Zealand -41 175 Managua Nicaragua 12 -86 Lagos Nigeria 6 3 Bergen Norway 60 5 Oslo Norway 60 11 Karachi Pakistan 25 67 Lahore Pakistan 31 74 Peshwar Pakistan 34 71 Panama City Panama 9 -79 Port Moresby Papua New Guinea -9 148 Ascuncion Paraguay -25 -57 Lima Peru -13 -77 Manila Philippines 14 121 Krakow Poland 51 20 Warsaw Poland 52 21 Lisbon Portugal 39 -10 San Juan Puerto Rico 18 -67 Bucharest Romania 44 27 Kiev Russia 50 30 Leningrad Russia 60 30 Minsk Russia 54 27 Moscow Russia 56 38 Odessa Russia 46 31 Tashkent Russia 41 69 Tbilisi Russia 42 45 Vladivostok Russia 44 132 Volgograd Russia 49 44 Dhahran Saudi Arabia 26 51 Jedda Saudi Arabia 21 39 Riyadh Saudi Arabia 24 47 Dakar Senegal 15 -17 Singapore Singapore 1 104 Mogadiscio Somalia 2 49 Cape Town South Africa -34 18 Johannesburg South Africa -26 28 Pretoria South Africa -26 28 Aden Yemen 13 45 Barcelona Spain 41 3 Madrid Spain 40 -4 Valencia Spain 39 0 Colombo Sri Lanka 7 80 Khartoum Sudan 15 32 Paramaribo Suriname 6 -56 Stockholm Sweden 59 19 Zurich Switzerland 47 8 Damascus Syria 33 36 Tainan Taiwan 23 120 Taipei Taiwan 25 121 Dar es Salaam Tanzania -7 39 Bangkok Thailand 14 100 Port of Spain Trinidad and Tobago 11 -61 Tunis Tunisia 37 10 Adana Turkey 37 35 Ankara Turkey 40 33 Istanbul Turkey 41 29 Izmir Turkey 38 27 Belfast Northern Ireland 54 -6 Birmingham England 52 -2 Cardiff Wales 51 -3 Edinburgh Scotland 56 -3 Glasgow Scotland 56 -4 London England 51 0 Montevideo Uruguay -35 -56 Caracas Venezuela 10 -67 Maracaibo Venezuela 10 -71 Da Nang Vietnam 17 108 Hanoi Vietnam 21 106 Ho Chi Minh City (Saigon) Vietnam 11 107 Belgrade Yugoslavia 45 20 Acapulco Mexico 17 -100 Beijing China 40 116 San Jose Costa Rica 10 -85 Hamilton Bermuda 32 -65 Vancouver Canada 49 -124 Kingston Jamaica 18 -77 ; run; /* Countries数据集: 各国家基本情况数据。 * 包括Name(国家名称)、Capital(首都)、Population(人口数)、 * Area(面积)、Continent(所在洲)、UNDate(加入联合国年)。 */ data Countries; input Name $ 1-35 Capital $ 37-55 Population 57-65 Area 67-75 Continent $ 77-107 UNDate 109-112; datalines; Afghanistan Kabul 17070323 251825 Asia 1946 Albania Tirane 3407400 11100 Europe 1955 Algeria Algiers 28171132 919595 Africa 1962 Andorra Andorra la Vella 64634 200 Europe 1993 Angola Luanda 9901050 481300 Africa 1976 Antigua and Barbuda St. John's 65644 171 Central America and Caribbean 1981 Argentina Buenos Aires 34248705 1073518 South America 1945 Armenia Yerevan 3556864 11500 Asia 1992 Australia Canberra 18255944 2966200 Australia 1945 Austria Vienna 8033746 32400 Europe 1955 Azerbaijan Baku 7760064 33400 Asia 1992 Bahamas Nassau 275703 5400 Central America and Caribbean 1973 Bahrain Manama 591800 300 Asia 1971 Bangladesh Dhaka 1.2639E8 57300 Asia 1974 Barbados Bridgetown 258534 200 Central America and Caribbean 1966 Belarus Minsk 10508000 80100 Europe 1945 Belgium Brussels 10162614 11800 Europe 1945 Belize Belmopan 211069 8900 Central America and Caribbean 1981 Benin Porto Novo 5394881 43500 Africa 1960 Bermuda Hamilton 60594 100 . Bhutan Thimphu 1756214 18100 Asia 1971 Bolivia La Paz 7795410 424200 South America 1945 Bosnia and Herzegovina Sarajevo 4697040 19700 Europe 1992 Botswana Gaborone 1372453 224600 Africa 1966 Brazil Brasilia 1.6031E8 3286500 South America 1945 Brunei Bandar Seri Begawan 287822 2200 Asia 1984 Bulgaria Sofia 8887111 42900 Europe 1955 Burkina Faso Ouagodougou 10235326 105900 Africa 1960 Burundi Bujumbura 6185632 10700 Africa 1962 Cambodia Phnom Penh 10366614 70200 Asia 1955 Cameroon Yaounde 13261994 183600 Africa 1960 Canada Ottawa 28392302 3849674 North America 1945 Cape Verde Praia 427188 1600 Africa . Cayman Islands Georgetown 23228 100 Central America and Caribbean . Central African Republic Bangui 3173103 240300 Africa 1960 Chad N'Djamena 5521118 495800 Africa 1960 Channel Islands 146436 100 Europe . Chile Santiago 14089101 292100 South America 1945 China Beijing 1.2022E9 3696100 Asia 1945 Colombia Bogota 35930188 440800 South America 1945 Comoros Moroni 535246 700 Africa 1975 Congo Brazzaville 2471223 132000 Africa 1960 Congo, Democratic Republic of Kinshasa 43106529 905400 Africa 1960 Costa Rica San Jose 3375083 19700 Central America and Caribbean 1945 Cote D'Ivoire Yamoussoukro 14437516 124500 Africa 1960 Croatia Zagreb 4744505 21800 Europe 1992 Cuba Havana 11173523 42800 Central America and Caribbean 1945 Cyprus Nicosia 737226 3600 Asia 1960 Czech Republic Prague 10511029 30400 Europe 1993 Denmark Copenhagen 5239356 16600 Europe 1945 Djibouti Djibouti 417089 8900 Africa 1977 Dominica Roseau 88871 300 Central America and Caribbean 1978 Dominican Republic Santo Domingo 7903469 18700 Central America and Caribbean 1945 Ecuador Quito 10782691 105000 South America 1945 Egypt Cairo 59912259 385200 Africa 1945 El Salvador San Salvador 5809949 8100 Central America and Caribbean 1945 England London 49293170 50400 Europe 1945 Equatorial Guinea Malabo 414059 10800 Africa 1968 Eritrea Asmera 3231677 45300 Africa 1993 Estonia Tallinn 1633006 17400 Europe 1991 Ethiopia Addis Ababa 59291170 437800 Africa 1945 Fiji Suva 771563 7100 Oceania 1970 Finland Helsinki 5119178 130600 Europe 1955 France Paris 58412558 210000 Europe 1945 French Guiana Cayenne 102000 43700 South America . Gabon Libreville 1150275 103300 Africa 1960 Gambia (The) Banjul 968493 4100 Africa 1965 Georgia, Republic of Tbilisi 5737236 26900 Asia 1992 Germany Berlin 81890690 137700 Europe 1973 Ghana Accra 17395511 92100 Africa 1957 Gibraltar Gibraltar 30297 100 Europe . Greece Athens 10669583 51000 Europe 1945 Grenada St. George's 94931 100 Central America and Caribbean 1974 Guatemala Guatemala City 10827127 42000 Central America and Caribbean 1945 Guinea Conakry 6455275 94900 Africa 1958 Guinea-Bissau Bissau 1108869 13900 Africa 1974 Guyana Georgetown 736216 83000 South America 1966 Haiti Port-au-Prince 6555255 10700 Central America and Caribbean 1945 Honduras Tegucigalpa 5367613 43300 Central America and Caribbean 1945 Hong Kong Victoria 5857414 400 Asia . Hungary Budapest 10421148 35900 Europe 1955 Iceland Reykjavik 266614 36700 1946 India New Delhi 9.2901E8 1222600 Asia 1945 Indonesia Jakarta 2.0239E8 741100 Asia 1950 Iran Tehran 66261493 632500 Asia 1945 Iraq Baghdad 20086891 168000 Asia 1945 Ireland Dublin 3574032 27100 Europe 1955 Isle of Man Douglas 70693 200 Europe . Israel Jerusalem 5101000 8000 Asia 1949 Italy Rome 58713508 116300 Europe 1955 Jamaica Kingston 2580291 4200 Central America and Caribbean 1962 Japan Tokyo 1.2635E8 145900 Asia 1956 Jordan Amman 4000210 34300 Asia 1955 Kalaallit Nunaat Nuuk 57564 840000 . Kazakhstan Almaty 17438936 1049200 Asia 1992 Kenya Nairobi 28520558 225000 Africa 1963 Kiribati Tarawa 78772 300 Oceania . Korea, North Pyongyang 23295340 47400 Asia 1991 Korea, South Seoul 45529277 38300 Asia 1991 Kuwait Kuwait City 1837006 6900 Asia 1963 Kyrgyzstan Bishkek 4744505 76600 Asia 1992 Laos Vientiane 4748545 91400 Asia 1955 Latvia Riga 2776212 24900 Europe 1991 Lebanon Beirut 3655834 3900 Asia 1945 Leeward Islands Plymouth 12119 100 Central America and Caribbean . Lesotho Maseru 1963244 11700 Africa 1966 Liberia Monrovia 3002430 38200 Africa 1945 Libya Tripoli 5107059 679400 Africa 1955 Liechtenstein Vaduz 30297 100 Europe 1990 Lithuania Vilnius 3886091 25200 Europe 1991 Luxembourg Luxembourg 405980 100 Europe 1945 Macedonia Skopje 2235917 9900 Europe 1993 Madagascar Antananarivo 13560924 226700 Africa 1960 Malawi Lilongwe 9828337 45700 Africa 1964 Malaysia Kuala Lumpur 19473883 127600 Asia 1957 Maldives Male 254495 100 Asia 1965 Mali Bamako 9203210 482100 Africa 1960 Malta Valletta 370633 100 Europe 1964 Marshall Islands Majuro 54535 100 Oceania 1991 Mauritania Nouakchott 2214709 398000 Africa 1961 Mauritius Port Louis 1128057 1000 Africa 1968 Mexico Mexico City 93114708 756100 North America 1945 Micronesia Palikir 121188 300 Oceania 1991 Moldova Chisinau 4517279 13000 Europe 1992 Monaco Monaco 31307 100 Europe 1993 Mongolia Ulaan Baatar 2454055 604800 Asia 1961 Montenegro Titograd 626137 5300 Europe . Morocco Rabat 28841705 177100 Africa 1956 Mozambique Maputo 17517708 313700 Africa 1975 Myanmar Yangon 44715298 261200 Asia 1948 Namibia Windhoek 1611798 318100 Africa 1990 Nauru Yaren 10099 100 Oceania . Nepal Kathmandu 21250295 56800 Asia 1955 Netherlands Amsterdam 15538306 16000 Europe 1945 Netherlands Antilles Willemstad 185822 400 Central America and Caribbean . New Zealand Wellington 3422548 104500 Oceania 1945 Nicaragua Managua 4137556 50900 Central America and Caribbean 1945 Niger Niamey 8720477 497000 Africa 1960 Nigeria Abuja 99062003 356700 Africa 1960 Northern Ireland Belfast 1585541 5500 Europe . Norway Oslo 4357714 125100 Europe 1945 Oman Muscat 1717838 118200 Asia 1971 Pakistan Islamabad 1.2306E8 339700 Asia 1947 Panama Panama City 2656034 29200 Central America and Caribbean 1945 Papua New Guinea Port Moresby 4238546 178700 Asia 1975 Paraguay Asuncion 5265614 157000 South America 1945 Peru Lima 23885121 496200 South America 1945 Philippines Manila 70500039 115900 Asia 1945 Poland Warsaw 39037645 120700 Europe 1945 Portugal Lisbon 10628177 35700 Europe 1955 Puerto Rico San Juan 3556864 3492 Central America and Caribbean . Qatar Doha 518078 4400 Asia 1971 Romania Bucharest 23410469 91700 Europe 1955 Russia Moscow 1.5109E8 6592800 Europe 1945 Rwanda Kigali 8456895 10200 Africa 1962 Saint Kitts and Nevis Basseterre 41406 100 Central America and Caribbean 1983 Saint Lucia Castries 146436 200 Central America and Caribbean 1979 Saint Vincent and the Grenadines Kingstown 116138 200 Central America and Caribbean 1980 San Marino San Marino 24238 100 Europe 1992 Sao Tome and Principe Sao Tome 138356 400 Africa 1975 Saudi Arabia Riyadh 18377132 865000 Asia 1945 Scotland Edinburgh 5006069 30400 Europe . Senegal Dakar 8817428 76000 Africa 1960 Serbia Belgrade 9755624 34100 Europe . Seychelles Victoria 72713 200 Africa 1976 Sierra Leone Freetown 4675832 27200 Africa 1961 Singapore Singapore 2887301 200 Asia 1965 Slovakia Bratislava 5457495 18900 Europe 1993 Slovenia Ljubljana 1991521 7800 Europe 1992 Solomon Islands Honiara 389821 11000 Oceania 1978 Somalia Mogadishu 6732996 246300 Africa 1960 South Africa Cape Town 44365873 473300 Africa 1945 Spain Madrid 39692061 194900 Europe 1955 Sri Lanka Colombo 18211509 25300 Asia 1955 Sudan Khartoum 29711229 966800 Africa 1956 Suriname Paramaribo 427188 63300 South America 1975 Swaziland Mbabane 945265 6700 Africa 1968 Sweden Stockholm 8864893 173700 Europe 1946 Switzerland Bern 7109689 15900 Europe . Syria Damascus 15034366 71500 Asia 1945 Taiwan Taipei 21509839 14000 Asia . Tajikistan Dushanbe 6054344 55300 Asia 1992 Tanzania Dar-es-Salaam 28263033 36400 Africa 1961 Thailand Bangkok 60099089 198100 Asia 1946 Togo Lome 4297120 21900 Africa 1960 Tonga Nuku'alofa 106040 300 Oceania . Trinidad and Tobago Port of Spain 1341146 2000 Central America and Caribbean 1962 Tunisia Tunis 8813388 63400 Africa 1956 Turkey Ankara 62769263 300948 Europe 1945 Turkmenistan Ashgabat 4034546 188400 Asia 1992 Turks and Caicos Islands Grand Turk 12119 200 Central America and Caribbean . Tuvalu Funafuti 10099 100 Oceania . Uganda Kampala 20055584 93100 Africa 1962 Ukraine Kiev 52360233 233100 Europe 1945 United Arab Emirates Abu Dhabi 2818628 30000 Asia 1971 United States Washington 2.6329E8 3787318 North America 1945 Uruguay Montevideo 3230667 68000 South America 1945 Uzbekistan Tashkent 22832806 172700 Asia 1992 Vanuatu Vila 171683 4700 Oceania 1981 Vatican City Vatican City 1010 2 Europe . Venezuela Caracas 20765543 352100 South America 1945 Vietnam Hanoi 73827657 127200 Asia 1977 Wales Cardiff 2825697 8000 Europe . Western Samoa Apia 206020 1100 Oceania 1976 Yemen Sanaa 11214929 205300 Asia 1947 Yugoslavia Belgrade 10866513 39400 Europe 1945 Zambia Lusaka 9278952 290600 Africa 1964 Zimbabwe Harare 11083641 150900 Africa 1980 ; run; *'; /* LEFT OUTER JOIN例子 */ proc sql; title 'Coordinates of Capital Cities'; select Capital format=$20., Name 'Country' format=$20., Latitude, Longitude from countries a left join worldcitycoords b on a.Capital = b.City and a.Name = b.Country order by Capital; quit; /* RIGHT OUTER JOIN例子 */ proc sql; title 'Populations of Capitals Only'; select City format=$20., Country 'Country' format=$20., Population from countries right join worldcitycoords on Capital = City and Name = Country order by City; quit; /* FULL OUTER JOIN例子 */ proc sql; title 'Populations/Coordinates of World Cities'; select City '#City#(WORLDCITYCOORDS)' format=$20., Capital '#Capital#(COUNTRIES)' format=$20., Population, Latitude, Longitude from countries full join worldcitycoords on Capital = City and Name = Country; quit; title; **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; **************************************************; /************************************************* ** ** ** 第三章 ** ** ** *************************************************/ proc print data=c9501; by sex; run; proc means data=samp.c9501; var math; output out=result n=n mean=meanmath var=varmath; run; proc print data=result; run; /******************************************/ /* PROC PRINT例子 */ /******************************************/ proc print data=samp.c9501 label; id name; var math chinese; label name='姓名' math='数学 成绩' chinese='语文 成绩'; run; proc print data=samp.c9501 split="*"; id name; var math chinese; label name='姓名' math='数学*成绩' chinese='语文*成绩'; run; proc print data=samp.c9501; format math 5.1 chinese 5.1; run; proc print data=samp.gpa; run; proc print data=samp.c9501; var name chinese sex; run; proc print data=samp.c9501 noobs; run; proc print data=samp.c9501; where name in ('李明', '张聪'); run; proc sort data=samp.c9501 out=c9501; by sex; run; proc print data=c9501; by sex; run; proc means data=c9501; var math chinese; run; proc print data=c9501 noobs label; var name sex math chinese avg; label name='姓名' sex='性别' math='数学' chinese='语文' avg='平均分'; run; /* ODS HTML, 生成一系列文件 */ ods html body="testhtml.htm"; title '身高和体重数据'; proc print data=samp.class noobs; run; title '身高和体重基本统计'; proc means data=samp.class; var height weight; class sex; run; title; ods html close; ods html; /* ODS CSVALL, 生成一系列文件 */ ods csvall body="testcsv.csv"; title '身高和体重数据'; proc print data=samp.class noobs; run; title '身高和体重基本统计'; proc means data=samp.class; var height weight; class sex; run; title; ods csvall close; /* ODS RTF */ ods rtf file="testods.rtf"; title '身高和体重数据'; proc print data=samp.class noobs; run; title '身高和体重基本统计'; proc means data=samp.class; var height weight; class sex; run; ods rtf close; ods rtf file="testods.rtf" contents toc_data bodytitle startpage=no keepn; ods rtf style=Minimal; ods noproctitle; title '身高和体重数据'; proc print data=samp.class noobs; run; title '身高和体重基本统计'; proc means data=samp.class; var height weight; class sex; run; ods rtf close; /* ODS LATEX */ ods latex file='sasout.tex' stylesheet='sas.sty'(url='sas'); proc means data=samp.class; title 'Mean Height and Weight'; var height weight; run; ods latex close; ods pdf file='sasout.pdf'; title; proc print data=samp.class noobs label; run; ods pdf close; /* 用ODS保存过程输出表为数据集 */ ods trace on; proc univariate data=samp.class; var height; run; ods trace off; ods output Moments=mom Quantiles=qu; proc univariate data=samp.class; var height; run; ods output close; proc print data=c9501 noobs; var name sex math chinese avg; format math 7.1 chinese 7.1 avg 7.2; run; title '95级1班成绩表'; proc print data=c9501 noobs label; var name sex math chinese avg; label name='姓名' sex='性别' math='数学' chinese='语文' avg='平均分'; run; proc means data=samp.gpa; run; title; footnote '第三章例子输出'; options nonumber nodate linesize=64 pagesize=60; proc sort data=c9501; by sex; run; proc print data=c9501; by sex; run; data bkmoney; input name $ amount; cards; 李明 20 张红艺 15 王思明 10 张聪 20 刘颍 50 ; run; proc print data=bkmoney noobs; sum amount; run; proc sort data=c9501; by name; proc sort data=bkmoney; by name; data c9501bk; merge c9501 bkmoney; by name; run; proc sort data=c9501bk; by sex; proc print data=c9501bk; by sex; sum amount; run; /* 用BY和ID指定相同变量可以简洁显示 */ proc print data=c9501bk; by sex; id sex; run; /* 用PROC REPORT分组简洁显示 */ data new; set samp.class; select(sex); when('F') sexc='女'; when('M') sexc='男'; end; run; proc report data=new nowd; column sexc age name height weight; define sexc / order '性别'; define age / order '年龄'; run; proc format; value $sfmt 'F'='女' 'M'='男'; run; proc report data=class nowd; column sex age name height weight; define sex / group '性别'; define age / group '年龄'; format sex $sfmt.; run; /* PROC PRINT 的在线例子 */ proc print data=exprev double; var month state revenues; where region='Southern' and revenues>=8000; title1 'High Monthly Revenues for'; title2 'the Southern Region'; run; /* ODF LaTeX 例子 */ ods latex file="simple.tex" stylesheet="sas.sty"(url="sas"); proc print data=samp.class; run; ods latex close; options nodate pageno=1 linesize=64 pagesize=60; proc sort data=pilots out=tempemp; by jobcode gender; run; proc print data=tempemp split='*'; id jobcode; by jobcode; var gender salary; sum salary; label jobcode='Job Code*========' gender='Gender*======' salary='Annual Salary*============='; format salary dollar11.2; where jobcode in ('PT1','PT2'); title 'Expenses Incurred for'; title2 'Salaries for Pilots'; run; title; /******************************************/ /* PROC TABULATE例子 */ /******************************************/ /* PROC TABULATE 基本例子 */ proc tabulate data=samp.c9501bk; class sex; var amount; table sex, amount; run; proc tabulate data=samp.c9501bk; class sex; table sex; run; proc tabulate data=samp.c9501bk; class sex; var math chinese; table sex, (math chinese)*(mean std); run; proc tabulate data=samp.c9501bk; class sex; var math chinese; table sex, (mean std)*(math chinese); run; proc tabulate data=samp.c9501bk; class sex; var math chinese; table sex all, (math chinese)*(mean std); run; proc tabulate data=samp.drug; class drug disease; var chang_bp; table drug*disease, chang_bp*(mean std); run; proc tabulate data=samp.drug; class drug disease; var chang_bp; table drug, disease*chang_bp*(mean std); run; proc tabulate data=samp.c9501bk; class sex; table (sex all)*(N PCTN); run; proc tabulate data=samp.c9501bk; class sex; var math chinese; table (sex all)*math*(mean std); run; proc tabulate data=samp.c9501bk; class sex; var math chinese; table (sex='性别' 'all'='总计'), (math='数学' chinese='语文')*('mean'='平均值' 'std'='标准差'); run; proc tabulate data=samp.c9501bk; class sex; var math chinese; table (sex all), (math chinese)*(mean std); keylabel mean='平均值' std='标准差' all='总计'; label sex='性别' math='数学' chinese='语文'; run; proc tabulate data=samp.c9501bk; class sex; var math chinese; table sex='性别' 'all'='总计', (math='数学' chinese='语文')* ('mean'='平均值'*F=6.1 'std'='标准差'*F=7.2); run; proc tabulate data=samp.c9501bk OUT=summd; class sex; var amount; table sex, amount*(N SUM PCTSUM); run; /* 与ODS配合使用STYLE选项说明。 */ proc tabulate data=samp.c9501bk; class sex; var math chinese / style=[font_size=200%]; table sex=' ' 'all'='总计'*[style=[font_weight=bold background=white]] , (math='数学' chinese='语文')* ('mean'='平均值'*F=6.1 'std'='标准差'*F=7.2); run; /* PROC TABULATE 的在线例子 */ proc print data=wghtclub; title 'Health Club Data'; run; proc tabulate data=wghtclub; class team; var strtwght endwght loss; table team, mean*(strtwght endwght loss); table mean*(strtwght endwght loss), team; title 'Mean Starting Weight, Ending Weight, and Weight Loss'; run; quit; /* specify HTML output */ ods html file='table.htm' style=default; /* 带有Style指定的TABULATE */ proc tabulate data=patients style=[font_weight=bold]; class actlevel; classlev actlevel / style=[just=left]; var age height weight / style=[font_size=3]; keyword all sum / style=[font_width=wide]; keylabel all="All Patients"; table (actlevel="Activity Level" all*[style=[background=yellow]]), (age height weight*f=best10.2)*mean / style=[background=white] misstext=[label="Missing" style=[font_weight=light]] box=[label="Patient Info by Activity Level" style=[font_style=italic]]; title 'Enhanced Table'; length Group $ 8; run; ods html close; title; /******************************************/ /* PROC SORT例子 */ /******************************************/ /* 排序例子 */ proc sort data=samp.c9501; by sex; run; /* 剔除重复值的排序 */ proc sort data=samp.c9501 out=c9501sex(keep=sex) nodupkey; by sex; run; proc print;run; proc sort data=samp.c9501; by sex descending avg; run; /* 排序与数据步 */ proc sort data=samp.class out=cl2; by sex descending height; run; data new; set cl2; if first.sex; by sex; run; proc print;run; /* 同样的问题用SQL解决。应改进为单个语句。*/ proc sql; select sex, max(height) AS height from samp.class group by sex; run; create table tmpd AS select sex, max(height) AS height from samp.class group by sex; run; select a.name, a.sex, a.height from samp.class AS a, tmpd AS b where a.sex=b.sex and a.height=b.height ; run; drop table tmpd; run; quit; * 上述程序简化为子查询; proc sql; select a.name, a.sex, a.height from samp.class AS a, ( select sex, max(height) AS height from samp.class group by sex ) AS b where a.sex=b.sex and a.height=b.height ; quit; /******************************************/ /* PROC TRANSPOSE例子 */ /******************************************/ /* 简单矩阵转置 */ data mat; input x1 x2 x3; cards; 1 2 3 4 5 6 7 8 9 10 11 12 ; run; proc transpose data=mat out=matt(rename=(col1=I1 col2=I2 col3=I3 col4=I4)); var x1 x2 x3; run; proc print;run; /* 合并行, 长表变宽表。 用TRANSPOSE. */ data onecol; input num test $ val; cards; 1 a 11 2 b 22 3 a 13 1 b 21 2 a 12 3 b 23 ; run; title '合并行: PROC TRANSPOSE'; proc sort data=onecol; by num; run; proc print;run; /* 错误结果! */ proc transpose data=onecol out=twotest; var val; by num; run; proc print;run; /* 结果正确但不保险 */ proc sort data=onecol; by num test; run; proc transpose data=onecol out=twotest; var val; by num; run; proc print;run; data onecolb; input num test $ val; cards; 1 a 11 2 b 22 3 a 13 1 b 21 3 b 23 ; run; proc sort data=onecolb; by num test; run; proc transpose data=onecolb out=twotest; var val; by num; run; proc print;run; /* 正确解法 */ proc transpose data=onecol out=twotest; var val; id test; by num; run; proc print;run; proc transpose data=onecolb out=twotestb; var val; id test; by num; run; proc print;run; /* 用数据步进行长表变宽表。 */ title '合并行: 数据集拆分与横向合并'; data a; set onecol; where test='a'; a=val; keep num a; run; data b; set onecol; where test='b'; b=val; keep num b; run; /* 或 */ data a(where=(test='a') rename=(val=a)) b(where=(test='b') rename=(val=b)); set onecol; run; proc print data=a;run; proc print data=b;run; proc sort data=a; by num; proc sort data=b; by num; data new; merge a b; by num; run; proc print;run; /* 用PROC SQL做长表变宽表 */ title '合并行: PROC SQL'; proc sql; select a.num as num, a.val as a, b.val as b from samp.onecol a, samp.onecol b where a.num=b.num and a.test='a' and b.test='b'; quit; /* 拆分一行为多行,宽表变长表。用TRANSPOSE。 */ title '拆分行: PROC TRANSPOSE'; proc sort data=samp.twocol; by num; run; proc transpose data=twocol out=onetest; var test1 test2; by num; run; proc print;run; **************************************************; title '拆分行: PROC TRANSPOSE'; proc sort data=samp.twocol out=twocol; by num; run; /* 为了给转置的变量起名字,使用数据集选项: */ proc transpose data=twocol out=onetest(rename=(col1=val)); var test1 test2; by num; run; proc print;run; /* 为了把试验号test1, test2改为1,2,使用字符串函数, ** 用LENGTH语句为输出的数据集变量排左右次序。 */ data new(drop=_name_); length num 8 testid 8 val 8; set onetest; testid = input(substr(_name_, 5,1), 1.); run; proc print;run; /* 用数据步拆分行。宽表变长表。 */ title '拆分行: 数据步重复OUTPUT'; data new1; attrib num length=8 test length=$1 val length=8; set samp.twocol; val=test1; test='a'; output; val=test2; test='b'; output; drop test1 test2; run; proc print;run; /* 用PROC SQL拆分行 */ title '拆分行: PROC SQL'; proc sql; select num, 'a' As test, test1 AS val from samp.twocol a union select num, 'b' As test, test2 AS val from samp.twocol; quit; /* 用array和循环拆分行。 input语句中的斜杠表示读下一行。 */ data rats; infile cards firstobs=2; attrib rem1-rem20 format= $1.; input w1 $ 1-12 dose1-dose20 / w2 $ 1-12 rem1-rem20 $; array x(20) dose1-dose20; array y(20) $ rem1-rem20; do i = 1 to 20; dose = x(i); remiss = y(i); output; end; keep i dose remiss; cards; rat number 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 trtmt dose 25 30 35 40 45 50 55 60 65 70 25 30 35 40 45 50 55 60 65 70 remission F F F F T T F T T T F F F F F T T T T T ; proc print data = rats; run; /******************************************/ /* PROC MEANS例子 */ /******************************************/ /* PROC Means */ proc means data=c9501; var math chinese; run; proc means data=samp.class sum cv; var height; run; /* Means例子:计算置信区间 */ proc means data=samp.class alpha=0.10 mean std clm; var height; run; proc means data=samp.class; var height weight; output out=mc1 mean=mh mw; run; proc means data=samp.class; var height weight; output out=mc2 mean= std= / autoname; run; proc means data=samp.class NOPRINT MEAN MIN MAX; var height weight; output out=mc3; run; proc print;run; /* Means例子:身高减去全班平均身高。 一个数据步中用了两个SET语句。 */ proc means data=samp.class; var height; output out=mc1 mean=mh; run; data cc; set samp.class; if _n_=1 then set mc1(keep=mh); height2 = height - mh; run; proc print;run; proc datasets nolist; delete cc; quit; /* Means例子: 用CLASS语句分组 */ proc means data=samp.class; class sex; var height; output out=mcs mean=mh; run; proc print;run; /* Means例子:分男女生,身高减去本组的平均身高得到height2。 */ proc means data=samp.class nway; class sex; var height; output out=mcs mean=mh; run; proc sort data=samp.class; by sex; proc sort data=mcs; by sex; data new; merge samp.class mcs(drop=_type_ _freq_); by sex; height2 = height - mh; run; proc print;run; /* Means例子: 用CLASS语句分组 */ proc means data=samp.grade maxdec=3; var Score; class Status Year; title '按学生状态和毕业年分类的分数统计'; run; title; /* Means例子: 用CLASS语句分组汇总 */ proc means data=samp.grade noprint; var Score; class Status Year; output out=mgsy mean=ms std=ss; run; proc print;run; /* Means例子: 指定要输出的统计量名 */ proc means data=samp.cake n mean max min range std fw=8; var PresentScore TasteScore; title 'Summary of Presentation and Taste Scores'; run; /******************************************/ /* PROC UNIVARIATE例子 */ /******************************************/ /* PROC UNIVARIATE */ proc univariate data=samp.gpa; var gpa; run; proc univariate data=samp.gpa plot; var gpa; run; /******************************************/ /* PROC FREQ例子 */ /******************************************/ /* PROC FREQ */ proc freq data=c9501; tables sex; run; /* PROC FREQ: 输出为数据集 */ proc freq data=c9501; tables sex / out=sext; run; proc print;run; data samp.taxif; input amount num @@; cards; 10 4 12 6 13 1 15 1 16 1 19 5 20 3 23 1 24 1 25 1 26 3 27 1 32 1 47 1 48 2 49 1 52 1 55 1 58 1 81 1 ; run; proc means data=taxif n mean sum; var amount; freq num; run; /* RPOC FREQ: 眼睛和头发颜色,简单的频数表 */ proc freq data=samp.color; tables eyes hair; weight count; title1 'Simple Frequency Tables'; run; /* RPOC FREQ: 眼睛和头发颜色列联表,保存为数据集 */ proc freq data=samp.color; weight count; tables eyes*hair/out=freqcnt outexpect sparse; title1 'Crosstabulation Table'; run; proc print data=freqcnt noobs; title2 'Output Data Set from PROC FREQ'; run; title; /* PROC FREQ列联表统计结果数据集生成例子 */ data test; do startid=1 to 3; do sastime=1 to 4; bid=rantbl(1133, 0.5, 0.3, 0.2); do pid=1 to bid; output; end; end; end; keep startid sastime pid; run; proc print;run; proc freq data=test; tables startid*sastime / out=counts; run; proc print;run; /******************************************/ /* PROC CORR例子 */ /******************************************/ /* PROC CORR */ proc corr data=samp.gpa; var hsm hss hse; run; proc corr data=samp.gpa; var hsm hss hse; with satm satv; run; proc corr data=samp.gpa spearman; var satm satv; with gpa; run; /******************************************/ /* PROC GPLOT例子 */ /******************************************/ /* 绘图:散点图 */ proc gplot data=samp.gpa; symbol i=none v=star; plot satv*satm; run; /* 绘图:连线图 */ proc gplot data=samp.air; symbol i=join v=star; plot co*datetime; run; /* 绘图:散点和连线图,规定颜色 goptions语句是全局语句,规定图形选项; reset=global会重置symbol, pattern, axis, legend 等图形全局语句的设置; reset=all除了重置这些语句设置以外还重置goptions语句 规定的设置。 plot语句选项: haxis=指定横坐标轴刻度,vaxis=指定纵坐标轴刻度, hminor=指定横轴的细分刻度数(等于细分份数减一), vminor=指定纵轴的细分刻度数, vref=指定纵轴的参考线位置, lvref=指定纵轴参考线线型, cvref=指定纵轴参考线颜色, caxis=指定坐标轴颜色, ctext=指定文本颜色。 symbol语句中height指定散点符号大小。 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftext=swiss ftitle=swissb htitle=6 htext=4; proc gplot data=samp.stocks; plot dowjoneshigh*year / haxis=1955 to 1995 by 5 vaxis=0 to 6000 by 1000 hminor=4 vminor=1 vref=1000 3000 5000 lvref=2 cvref=blue caxis=blue ctext=red; symbol1 color=red interpol=join value=dot height=1; title1 'Dow Jones Yearly Highs'; footnote h=3 j=l ' Source: 1997 World Almanac' j=r 'GPLDTPT1 '; run; quit; title;footnote; /* 绘图:多条曲线 */ proc gplot data=samp.air; symbol1 color=black i=join v=none line=1 ; symbol2 color=blue i=join v=none line=2 ; plot co*datetime=1 so2*datetime=2 / overlay; run; /* GPLOT例子: 股票数据多曲线图 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftext=swiss ftitle=swissb htitle=6 htext=4; title1 'Dow Jones Yearly Highs and Lows'; footnote1 h=3 j=l ' Source: 1997 World Almanac' j=r 'GPLOVRL1 '; symbol1 color=red interpol=join value=dot height=3; symbol2 font=marker value=C color=blue interpol=join height=2; axis1 order=(1955 to 1995 by 5) offset=(2,2) label=none major=(height=2) minor=(height=1) width=3; axis2 order=(0 to 6000 by 1000) offset=(0,0) label=none major=(height=2) minor=(height=1) width=3; legend1 label=none shape=symbol(4,2) position=(top center inside) mode=share; proc gplot data=samp.stocks; plot DowJonesHigh*year DowJonesLow*year / overlay legend=legend1 vref=1000 to 5000 by 1000 lvref=2 haxis=axis1 hminor=4 vaxis=axis2 vminor=1; run; quit; title;footnote; /* GPLOT例子: 月平均气温的多曲线 */ goptions reset=global gunit=pct border cback=white colors=(black red blue green) ftext=swiss ftitle=swissb htitle=6 htext=3; title1 'Average Monthly Temperature'; footnote1 j=l ' Source: 1984 American Express'; footnote2 j=l ' Appointment Book' j=r 'GPLVRBL2(a) '; symbol1 interpol=join value=dot height=3; proc gplot data=samp.citytemp; plot faren*month=city / hminor=0; run; footnote2 j=l ' Appointment Book' j=r 'GPLVRBL2(b) '; symbol1 color=green interpol=spline width=2 value=triangle height=3; symbol2 color=blue interpol=spline width=2 value=circle height=3; symbol3 color=red interpol=spline width=2 value=square height=3; axis1 label=none value=('JAN' 'FEB' 'MAR' 'APR' 'MAY' 'JUN' 'JUL' 'AUG' 'SEP' 'OCT' 'NOV' 'DEC') order = (1 to 12 by 1) offset=(2) width=3; axis2 label=('Degrees' justify=right 'Fahrenheit') order=(0 to 100 by 10) width=3; legend1 label=none value=(tick=1 'Minneapolis'); plot faren*month=city / haxis=axis1 hminor=0 vaxis=axis2 vminor=1 caxis=red legend=legend1; run; quit; title;footnote; /* GPLOT例子: 两个纵轴, 垂线图 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftext=swiss ftitle=swissb htitle=6 htext=3; data samp.minntemp; input @10 month @23 f2; /* fahrenheit temperature for Minneapolis */ /* calculate centigrade temperature */ /* for Minneapolis */ c2=(f2-32)/1.8; output; datalines; 01JAN83 1 1 40.5 12.2 52.1 01FEB83 2 1 42.2 16.5 55.1 01MAR83 3 2 49.2 28.3 59.7 01APR83 4 2 59.5 45.1 67.7 01MAY83 5 2 67.4 57.1 76.3 01JUN83 6 3 74.4 66.9 84.6 01JUL83 7 3 77.5 71.9 91.2 01AUG83 8 3 76.5 70.2 89.1 01SEP83 9 4 70.6 60.0 83.8 01OCT83 10 4 60.2 50.0 72.2 01NOV83 11 4 50.0 32.4 59.8 01DEC83 12 1 41.2 18.6 52.5 ; run; title1 'Average Monthly Temp for Minneapolis'; footnote1 j=l ' Source: 1984 American Express'; footnote2 j=l ' Appointment Book' j=r 'GPLSCVL1 '; symbol1 interpol=needle ci=blue cv=red width=3 value=star height=3; symbol2 interpol=none value=none; axis1 label=none value=(h=3 'JAN' 'FEB' 'MAR' 'APR' 'MAY' 'JUN' 'JUL' 'AUG' 'SEP' 'OCT' 'NOV' 'DEC') order=(1 to 12 by 1) offset=(2) width=3; axis2 label=(h=3 'Degrees' justify=right ' Centigrade') order=(-20 to 30 by 10) width=3 value=(h=3); axis3 label=(h=3 'Degrees' justify=left 'Fahrenheit') order=(-4 to 86 by 18) width=3 value=(h=3); proc gplot data=samp.minntemp; plot c2*month / frame caxis=red haxis=axis1 hminor=0 vaxis=axis2 vminor=1; plot2 f2*month / caxis=red vaxis=axis3 vminor=1; run; quit; title;footnote; /* GPLOT例子: 股票数据,线间填充AREA选项 */ goptions reset=global gunit=pct border cback=white colors=(blue red) ctext=black ftext=swiss ftitle=swissb htitle=6 htext=4; title1 'Dow Jones Yearly Highs and Lows'; footnote1 h=3 j=l ' Source: 1997 World Almanac' j=r 'GPLFILL1 '; symbol1 interpol=join; axis1 order=(1955 to 1995 by 5) offset=(2,2) label=none major=(height=2) minor=(height=1); axis2 order=(0 to 6000 by 1000) offset=(0,0) label=none major=(height=2) minor=(height=1); proc gplot data=samp.stocks; plot DowJonesLow*year DowJonesHigh*year / overlay haxis=axis1 hminor=4 vaxis=axis2 vminor=1 caxis=black areas=2; run; quit; /* 绘图:带回归线的散点图 */ proc gplot data=samp.gpa; symbol i=rlcli95 v=star; plot satv*satm; run; /* 绘图:对模拟数据使用回归和平滑 */ data curves; n = 30; do i=1 to n; x = normal(0)*10; y = 10 + 2*x + normal(0); output; end; keep x y; run; proc sort data=curves; by x; proc gplot data=curves; symbol v=star i=spline; plot y*x; title 'Spline Smooth'; run; proc gplot data=curves; symbol v=star i=sm70; plot y*x; title 'i=sm70 Smooth'; run; proc gplot data=curves; symbol v=star i=rlcli95; plot y*x; title 'i=rlcli95 Smooth'; run; title; /* 绘图:对稀疏的正弦和余弦曲线采样用spline平滑 */ data sine; do x=0 to 360 by 70; y = sin(x/180*3.1415926); y2=.; output; end; do x=0 to 360; y=.; y2 = sin(x/180*3.1415926); output; end; run; proc gplot data=sine; symbol i=spline v=star color=black; symbol2 i=join v=none color=blue; plot y*x=1 y2*x=2 / overlay; run; /* GPLOT 例子: 散点图和回归线 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftext=swiss ftitle=swissb htitle=6 htext=4; title 'Study of Height vs Weight'; footnote1 h=3 j=l ' Source: T. Lewis & L. R. Taylor'; footnote2 h=3 j=l ' Introduction to Experimental Ecology' j=r 'GPLVRBL1(a) '; proc gplot data= samp.stats; plot height*weight; run; footnote1; footnote2 h=3 j=r 'GPLVRBL1(b) '; symbol1 interpol=rcclm95 /* regression analysis with */ /* confidence limits */ value=diamond /* plot symbol */ height=3 /* plot symbol height */ cv=red /* plot symbol color */ ci=blue /* regression line color */ co=green /* confidence limits color */ width=2; /* line width */ plot height*weight / haxis=45 to 155 by 10 vaxis=48 to 78 by 6 hminor=1 regeqn; run; quit; title;footnote; /* PROC PLOT:低精度字符散点图 */ proc plot data=samp.stocks; plot dowjoneshigh*year='*'; title 'High Dow Jones Values'; title2 'from 1954 to 1998'; run; quit; /* PROC PLOT:低精度字符散点图,两条曲线 */ proc plot data=samp.stocks; plot dowjoneshigh*year='*' dowjoneslow*year='o' / overlay box; title 'Plot of Highs and Lows'; title2 'for the Dow Jones Industrial Average'; run; quit; /* GPLOT例子: 气泡图。散点图的变种,气泡大小代表第三维。 数据为各类工程师平均工资。 eng: 工程师种类。dollars: 工资。num:人数。 */ data samp.jobs; length eng $5; input eng dollars num; datalines; Civil 27308 73273 Aero 29844 70192 Elec 22920 89382 Mech 32816 19601 Chem 28116 25541 Petro 18444 34833 ; run; /* goptions中选项reset=global可以重置所有绘图选项, 如颜色、绘图符号、连线类型、填充模式等; gunit指定绘图选项缺省的数值单位, border要求绘制坐标区域边界, cback为背景色, colors为各种符号、连线的颜色取用优先表, ftitle指定图形标题的字体, ftext指定图形标注的字体, htitle指定标题高度,这里单位是百分比所以标题高度为绘图高度的6%, htext指定图形标注字体高度。 */ goptions reset=global gunit=pct border cback=white colors=(black blue greeen red) ftitle=swissb ftext=swiss htitle=6 htext=4; title1 'Member Profile'; title2 'Salaries and Number of Member Engineers'; footnote h=3 j=r 'GPLBUBL1 '; axis1 offset=(5,5); /* 坐标轴原点位置,单位是百分比 在下面的bubble语句中用了haxis=指定用axis1的规定画横轴。 注意此图的横轴是一个分类变量。 */ proc gplot data=samp.jobs; format dollars dollar9.; bubble dollars*eng=num / haxis=axis1; run; quit; /* GPLOT例子: 带标注的气泡图 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftitle=swissb ftext=swiss htitle=6 htext=3; title1 'Member Profile'; title2 h=4 'Salaries and Number of Member Engineers'; footnote h=3 j=r 'GPLBUBL2'; /* axis1和axis2: 在下面的bubble语句中用了haxis=指定用axis1的规定画横轴, 用vaxis=指定了用axis2的规定画纵轴。 注意此图的横轴是一个分类变量。 width=指定坐标轴粗细。 label=none表示没有轴标签(变量名)。 major=后面的括号中给出一些粗刻度线的规定, minor=后面的括号中给出一些细刻度线的规定, value=后面的括号中给出一些刻度值的规定。 */ axis1 label=none offset=(5,5) width=3 value=(height=4); axis2 order=(0 to 40000 by 10000) label=none major=(height=1.5) minor=(height=1) width=3 value=(height=4); /* bubble语句的bcolor指定气球颜色, blabel要求标出气球对应的数值, bfont指定显示气球对应数值的字体, bsize指定气球大小倍数, caxis指定坐标轴颜色。 */ proc gplot data=samp.jobs; format dollars dollar9. num comma7.0; bubble dollars*eng=num / haxis=axis1 vaxis=axis2 vminor=1 bcolor=red blabel bfont=swissi bsize=12 caxis=blue; run; quit; /* GPLOT例子: 气泡图,带有第二个纵轴 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftext=swiss ftitle=swissb htitle=6 htext=3; data jobs2; set samp.jobs; yen=dollars*125; run; title1 'Member Profile'; title2 h=4 'Salaries and Number of Member Engineers'; footnote j=r 'GPLAXIS1 '; axis1 offset=(5,5) label=none width=3 value=(h=4); proc gplot data= jobs2; format dollars dollar7. num yen comma9.0; bubble dollars*eng=num / haxis=axis1 vaxis=10000 to 40000 by 10000 hminor=0 vminor=1 blabel bfont=swissi bcolor=red bsize=12 caxis=blue; bubble2 yen*eng=num / vaxis=1250000 to 5000000 by 1250000 vminor=1 bcolor=red bsize=12 caxis=blue; run; quit; title;footnote; /******************************************/ /* PROC GCHART例子 */ /******************************************/ /* GCHART例子:直方图 */ proc gchart data=samp.gpa; vbar gpa; run; /* 用PROC UNIVARIATE作直方图 */ proc univariate data=samp.gpa noprint; var GPA; histogram; run; /* GCHART例子:用GROUP=作分组直方图 */ proc gchart data=samp.gpa; vbar gpa / group=sex; run; /* GCHART例子: 用SUBGROUP=分段。不同年龄组人数,按性别分段。 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftitle=swissb ftext=swiss htitle=6 htext=3.5; title1 'Fitness Program Participants'; footnote1 h=3 j=r 'GCHBRMID(a) '; axis1 label=('Number of People') minor=(number=1) offset=(0,0); legend1 label=none value=('Women' 'Men'); pattern1 color=cyan; pattern2 color=blue; proc gchart data=samp.fitness2; hbar age / subgroup=sex legend=legend1 autoref clipref coutline=black raxis=axis1; run; footnote h=3 j=r 'GCHBRMID(b) '; axis1 order=(0 to 20 by 2) label=('Number of People') minor=(number=1) offset=(0,0); axis2 label=('Age' j=r 'Group'); hbar3d age / midpoints=(20 30 40 50) freq freqlabel='Total in Group' subgroup=sex autoref maxis=axis2 raxis=axis1 legend=legend1 coutline=black cframe=grayaa; run; quit; title;footnote; /* GCHART例子: 直方图。 按年龄分组,按性别分段(subgroup=sex)。 freq要求每个条形的值对应于频数。 freqlabel指定条形值的坐标轴标签。 autoref要求绘制参考线。 maxis=选项指定一个自定义的坐标轴用来画分组轴, raxis=选项指定一个自定义的坐标轴用来画条形数值轴。 coutline指定条形边框颜色。 */ proc gchart data=samp.fitness2; axis1 order=(0 to 20 by 2) label=('Number of People') minor=(number=1) offset=(0,0); axis2 label=('Age ' j=r 'Group'); hbar3d age / midpoints=(20 30 40 50) freq freqlabel='Total in Group' subgroup=sex autoref maxis=axis2 raxis=axis1 coutline=black; title1 'Fitness Program Participants'; run; quit; title; /* GCHART例子: 用SUMVAR=分组求和。每个site的sales总和。 */ goptions reset=global gunit=pct border cback=white colors=(black red blue green) ftitle=swissb ftext=swiss htitle=6 htext=3.5; title1 'Total Sales'; footnote1 h=3 j=r 'GCHBRSUM(a) '; pattern1 color=red; proc gchart data=samp.totals; format sales dollar8.; hbar site / sumvar=sales; run; footnote1 h=3 j=r 'GCHBRSUM(b) '; vbar3d site / sumvar=sales coutline=black; run; quit; title;footnote; /* GCHART例子: SUBGROUP, 每个SITE的总sales和,并按DEPT分段 */ goptions reset=global gunit=pct border cback=white colors=(black red green blue) ftitle=swissb ftext=swiss htitle=6 htext=3 offshadow=(1.5,1.5); title 'Total Sales by Site'; footnote h=3 j=r 'GCHBRGRP '; axis1 label=none origin=(24,); axis2 label=none order=(0 to 100000 by 20000) minor=(number=1) offset=(,0); /* 去掉纵轴 */ axis3 noplane label=none value=none style=0 major=none minor=none; legend1 label=none shape=bar(3,3) cborder=black cblock=gray origin=(24,); pattern1 color=lipk; pattern2 color=cyan; pattern3 color=lime; proc gchart data=samp.totals; format quarter roman.; format sales dollar8.; vbar3d site / sumvar=sales subgroup=dept inside=subpct outside=sum width=9 space=4 cframe=gray maxis=axis1 raxis=axis2 coutline=black legend=legend1; run; quit; title;footnote; /* GCHART例子: 带有误差条。 用TYPE=MEAN和SUMVAR=指定了条形长度为分组的平均值, 用ERRORBAR=BARS和CLM=95指定对平均值画置信区间。 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftitle=swissb ftext=swiss htitle=5 htext=3.5; title1 'Average Resting Heart Rate by Age'; footnote h=3 j=r 'GCHERRBR '; axis1 label=('Heart Rate' j=c 'Error Bar Confidence Limits: 95%') minor=(number=1); axis2 label=('Age' j=r 'Group'); pattern1 color=cyan; proc gchart data=samp.fitness2; hbar age / type=mean freqlabel='Number in Group' meanlabel='Mean Heart Rate' sumvar=heartrate errorbar=bars noframe clm=95 midpoints=(20 30 40 50) raxis=axis1 maxis=axis2 coutline=black; run; quit; /* GCHART例子:用BLOCK语句作积木图。适用于GROUP=分组。 */ goptions hpos=90 vpos=70; proc gchart data=samp.houses; block style / group=bedrooms; run; /* GCHART例子:积木图中用SUMVAR=指定求和。不同SITE的SALES和 */ goptions reset=global gunit=pct border cback=white ctext=black colors=(blue green red) ftext=swiss ftitle=swissb htitle=6 htext=3.5; title 'Total Sales'; footnote j=r 'GCHBKSUM '; proc gchart data=samp.totals; format sales dollar8.; block site / sumvar=sales; run; quit; title;footnote; /* GCHART例子: GROUP=和SUBGROUP=的积木图,用了SUMVAR=和TYPE=MEAN求分组均值。 */ goptions reset=global gunit=pct border cback=white colors=(blue green red) ctext=black ftitle=swissb ftext=swiss htitle=4 htext=3; title1 'Average Sales by Department'; footnote1 h=3 j=r 'GCHBKGRP '; legend1 cborder=black label=('Quarter:') position=(middle left outside) mode=protect across=1 ; proc gchart data=samp.totals; format quarter roman.; format sales dollar8.; label site='00'x dept='00'x; block site / sumvar=sales type=mean midpoints='Sydney' 'Atlanta' group=dept subgroup=quarter legend=legend1 noheading coutline=black caxis=black; run; quit; /* GCHART例子: 饼图 */ proc gchart data=samp.gpa; pie sex; pie3d sex; run; proc gchart data=samp.gpa; pie sex / type=percent; pie3d sex / type=percent; run; /* GCHART例子: PIE图带SUMVAR */ goptions reset=global gunit=pct border cback=white colors=(blue green red) ctext=black ftitle=swissb ftext=swiss htitle=6 htext=4; title 'Total Sales'; footnote h=3 j=r 'GCHPISUM(a) '; proc gchart data=samp.totals; format sales dollar8.; pie site / sumvar=sales coutline=black; run; footnote h=3 j=r 'GCHPISUM(b) '; pie3d site / sumvar=sales coutline=black explode='Paris'; run; quit; /* GCHART例子: 饼图。 第一个图按性别分块,统计量值为aerobic的平均值。 选项fill=solid是图案的填充方式为实心色块。 第二个图按性别分组(group=sex),画并排的两个饼图, 每个饼图按exercise分块,统计量值为心率的平均值。 选项discrete表示把exercise看成离散取值,每个值画一块; across=2是分组画图时并排的个数, ctext是文本颜色, explode=4指把exercise取4的块拆分出来, slice=arrow要求饼图的块与统计量值用线连接。 */ title;footnote; proc gchart data=samp.fitness2; pie sex / type=mean fill=solid sumvar=aerobic; pie3d exercise / type=mean sumvar=heartrate group=sex discrete across=2 fill=solid ctext=blue explode=4 slice=arrow noheading; run; quit; /* GCHART例子: 饼图标签。用SUMVAR=指定代表的数值(每组仅一个观测) */ goptions reset=global gunit=pct border cback=white colors=(black blue green red cyan lime gray) ftitle=swissb ftext=swiss htitle=6 htext=4; title 'Sources of Energy, 1995'; footnote h=3 j=r 'GCHLABEL '; proc gchart data=samp.enprod (where=(year=1995)); pie engytype / sumvar=produced other=0 midpoints='Coal' 'Geotherm' 'Petro' 'Biofuels' 'Gas' 'Nuclear' 'Hydro' value=none percent=arrow slice=arrow cfill=cyan noheading; run; quit; title;footnote; /* GCHART例子: 饼图的填充图案 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftitle=swissb ftext=swiss htitle=5 htext=4; title1 'Principle Sources of Energy: 1985'; title2 font=swissb h=4.5 '(Amounts in quadrillion btu)'; footnote h=3 j=r 'GCHLEGND '; pattern1 color=black; /* biofuels */ pattern2 color=blue; /* coal */ pattern3 color=green; /* gas */ pattern4 color=gray; /* geothermal */ pattern5 color=lipk; /* hydoelectric */ pattern6 color=lime; /* nuclear */ pattern7 color=cyan; /* petro */ pattern8 color=red; /* other */ legend1 label=none position=(left middle) offset=(4,) across=1 order=('Coal' 'Gas' 'Petro' 'Nuclear' 'Renewable') value=(color=black) shape=bar(4,4); proc gchart data=samp.enprod(where=(year=1985)); pie engytype / sumvar=produced other=5 otherlabel='Renewable' descending legend=legend1 value=inside ctext=white coutline=black noheading; run; quit; title;footnote; /* GCHART例子: 详细的饼图 不同SITE的总销售额。 每一SITE中再按DEPT(DETAIL=DEPT)分扇区。 DETAIL_PERCENT, DETAIL_SLICE指定标小扇区的标签和数值的方法。 DETAIL_VALUE=NONE不标出每个小扇区的具体销售额。 */ goptions reset=global gunit=pct border cback=white ftitle=swissb ftext=swiss htitle=5 htext=2.5; title1 'Site Sales By Dept (Details)'; footnote1 h=3 j=r 'GCHDTPIE '; proc gchart data=samp.totals; pie site / sumvar=sales detail=dept detail_percent=best detail_value=none detail_slice=best legend; run; quit; title;footnote; /* GCHART例子:用DONUT语句作圆环图。 用扇区夹角大小代表数值。 主要变量SITE分块,SUBGROUP分层。 数值为SALES总和。 DONUTPCT指定中间空洞的半径比例。 */ goptions reset=global gunit=pct border cback=white colors=(blue green red) ctext=black ftitle=swissb ftext=swiss htitle=6 htext=4; title 'Sales by Site and Department'; footnote h=3 j=r 'GCHSBGRP '; legend1 label=none shape=bar(4,4) position=(middle left) offset=(5,) across=1 mode=share; proc gchart data=samp.totals; donut site / sumvar=sales subgroup=dept noheading donutpct=30 label=('All' justify=center 'Quarters') legend=legend1 coutline=black ctext=black; run; quit; title;footnote; /* GCHART例: 用STAR语句作星图(雷达图)。 用扇骨长度代表数值。 不同SITE的SALES总和。 */ goptions reset=global gunit=pct border cback=white colors=(blue green red) ctext=black ftitle=swissb ftext=swiss htitle=6 htext=4; title 'Total Sales'; footnote h=3 j=r 'GCHSTSUM '; proc gchart data=samp.totals; format sales dollar8.; star site / sumvar=sales; run; quit; title;footnote; /* GCHART例: 离散变量星图。 不同日期的总不合格数。 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftext=swiss ftitle=swissb htext=3.5 htitle=6; title 'Rejected Parts'; footnote h=3 j=r 'GCHDSCRT(a) '; proc gchart data=samp.rejects; format date worddate3.; star date / discrete sumvar=badparts noheading fill=s; run; footnote h=3 j=r 'GCHDSCRT(b) '; star date / discrete sumvar=badparts noconnect noheading coutline=red; run; quit; title;footnote; /* 盒形图例子。 */ data new; set samp.gpa; g = 1; run; proc boxplot data=new; plot gpa * g / boxstyle=schematic; run; proc sort data=new; by sex; proc boxplot data=new; plot gpa * sex / boxstyle=schematic; run; /******************************************/ /* 曲面图和等值线图例子 */ /******************************************/ /* 绘图:曲面图和等值线图 */ data dnorm2; a=2; a2=sqrt(a); r=0.5; det=a*(1-r*r); do x=-3 to 3 by 0.3; do y=-3*a2 to 3*a2 by 0.3*a2; z=1/(2*3.1415926*sqrt(det))*exp(-0.5/det* (a*x*x + y*y - 2*r*a2*x*y)); output; end; end; keep x y z; run; proc g3d data=dnorm2; plot y*x=z; run; proc gcontour data=dnorm2; plot y*x=z / nolegend autolabel; run; title;footnote; /* G3D例子: 帽子 */ goptions reset=global gunit=pct border cback=white colors=(black blue green red) ftext=swiss ftitle=swissb htitle=6 htext=4; data hat; do x=-5 to 5 by 0.25; do y=-5 to 5 by 0.25; z=sin(sqrt(x*x+y*y)); output; end; end; run; title 'Surface Plot of HAT Data Set'; footnote j=r 'GTDSURFA'; proc g3d data=hat; plot y*x=z; run; quit; title;footnote; /* G3D例子: 帽子带旋转 */ title 'Surface Plot of HAT Data Set'; footnote j=r 'GTDROTAT'; proc g3d data=hat; plot y*x=z / grid rotate=45 ctop=red cbottom=black yticknum=5 zticknum=5 zmin=-3 zmax=1; run; quit; title;footnote; /* G3D例子: 帽子带倾斜 */ title 'Surface Plot of HAT Data Set'; footnote j=r 'GTDTILT'; proc g3d data=hat; plot y*x=z / side tilt=15; run; quit; title;footnote; /* G3D例子: IRIS三维散点图 */ title1 'Iris Species Classification'; title2 'Physical Measurement'; title3 'Source: Fisher (1936) Iris Data'; footnote1 j=l ' Petallen: Petal Length in mm.' j=r 'Sepallen: Sepal Length in mm. '; footnote2 j=l ' Petalwid: Petal Width in mm.' j=r 'Sepal Width not shown '; footnote3 j=r 'GTDSCATR'; proc g3d data=samp.iris; scatter petallen*petalwid=sepallen; run; quit; title;footnote; /* G3D例子: IRIS三维散点图,不同符号 */ data iris2; set samp.iris; length species $12. colorval $8. shapeval $8.; if species='Setosa' then do; shapeval='club'; colorval='blue'; end; if species='Versicolor' then do; shapeval='diamond'; colorval='red'; end; if species='Virginica' then do; shapeval='spade'; colorval='green'; end; run; title1 'Iris Species Classification'; title2 'Physical Measurement'; title3 'Source: Fisher (1936) Iris Data'; footnote1 j=l ' Petallen: Petal Length in mm.' j=r 'Petalwid: Petal Width in mm. '; footnote2 j=l ' Sepallen: Sepal Length in mm.' j=r 'Sepal Width not shown '; footnote3 j=r 'GTDSHAPE(a)'; proc g3d data=iris2; scatter petallen*petalwid=sepallen / color=colorval shape=shapeval; note; note j=r 'Species: ' c=green 'Virginica ' j=r c=red 'Versicolor ' j=r c=blue 'Setosa '; run; title3; footnote1 j=l ' Source: Fisher (1936) Iris Data'; footnote2 j=r 'GTDSHAPE(b)'; proc g3d data=iris2; scatter petallen*petalwid=sepallen / noneedle grid color=colorval shape=shapeval; label petallen='Petal Length' petalwid='Petal Width' sepallen='Sepal Length'; run; quit; title;footnote; /* 绘图:中文字体 */ goptions ftext="宋体" ftitle="黑体" htitle=6 pct htext=3 pct; proc gplot data=samp.class; title "试验SAS图形的汉字功能"; symbol i=none v=square; plot weight * height; label weight = "体重" height="身高"; run; /******************************************/ /* Analyst生成的程序例子 */ /******************************************/ /* 从数据集随机抽样。无放回 */ proc sql; create view _tmpv as select *, ranuni(0) as _random from indata order by calculated _random; quit; data sampled; set indata(obs=nsub); run; proc datasets lib=work noprint; delete _tmpv /memtype=view; quit; /* 从数据集随机无放回抽样另一做法,按比例抽取 */ data sampled; set indata; if ranuni(0) le 0.1 then output; run; /* 有放回抽样。SET语句的NOBS选项可以指定包含行数的临时变量并且此 变量在编译时赋值。POINT=选项指定观测号读取。数据步末尾需要STOP语句。 */ data sampled; do i=1 to 10; point = ceil(ranuni(0)*nobs); set indata point=point nobs=nobs; output; end; stop; run; /******************************************/ /* PROC DATASETS例子 */ /******************************************/ /* PROC DATASETS:列出数据集*/ PROC DATASETS LIBRARY=samp MEMTYPE=DATA NOLIST; CONTENTS DATA=_all_ DETAILS wNODS; RUN;QUIT; /* PROC SQL: 把数据集列表存入数据集*/ PROC SQL; CREATE TABLE sampdir AS SELECT * FROM DICTIONARY.TABLES WHERE LIBNAME='SAMP'; QUIT; /* PROC DATASETS: 显示并保存指定数据集的变量列表 */ PROC DATASETS LIBRARY=samp MEMTYPE=DATA NOLIST; CONTENTS DATA=class OUT=vlist; RUN;QUIT; /* PROC CONTENTS:列出数据集中变量列表 */ PROC CONTENTS data=samp.class; RUN; PROC CONTENTS data=samp.class OUT=vlist; RUN; /* PROC DATASETS: 数据集改名 */ PROC DATASETS LIBRARY=samp MEMTYPE=DATA NOLIST; CHANGE c9501f=c9501nv c9501m=c9501nan; RUN;QUIT; PROC DATASETS LIBRARY=samp MEMTYPE=DATA NOLIST; CHANGE c9501nv=c9501f c9501nan=c9501m; RUN;QUIT; /* PROC DATASETS: 删除数据集 */ PROC DATASETS NOLIST; COPY OUT=WORK IN=SAMP; SELECT class gpa; RUN;QUIT; PROC SQL; CREATE VIEW c9501v AS SELECT * FROM samp.c9501 a, samp.bkmoney b WHERE a.name=b.name; RUN;QUIT; PROC DATASETS LIBRARY=work NOLIST; DELETE class gpa; DELETE c9501v / MEMTYPE=VIEW; RUN;QUIT; /* PROC DATASETS: 完全复制逻辑库 */ PROC DATASETS NOLIST; COPY OUT=WORK IN=SAMP; RUN;QUIT; /* PROC DATASETS: 复制部分数据集 */ PROC DATASETS NOLIST; COPY OUT=WORK IN=SAMP; SELECT c9501 class gpa; RUN;QUIT; /* PROC DATASETS: 在数据集末尾添加内容 */ PROC DATASETS LIBRARY=work MEMTYPE=DATA NOLIST; APPEND BASE=c9501fm DATA=samp.c9501m; APPEND BASE=c9501fm DATA=samp.c9501f; RUN;QUIT; /* PROC DATASETS: 修改变量属性 */ PROC DATASETS LIBRARY=WORK NOLIST; COPY OUT=work IN=samp; SELECT class; RUN; MODIFY class; ATTRIB name LABEL='姓名' weight FORMAT=8.2; QUIT; PROC PRINT DATA=class label; RUN; /* PROC DATASETS: 修改变量名 */ PROC DATASETS LIBRARY=WORK NOLIST; COPY OUT=work IN=samp; SELECT class; RUN; MODIFY class; RENAME height=h weight=w; QUIT; PROC PRINT DATA=class;run; /******************************************/ /* PROC RANK例子 */ /******************************************/ /* 由低到高排名,同名次取平均值 */ PROC RANK DATA=samp.class OUT=rc; VAR age; RANKS agerank; run; /* 由高到低排名,同名次取最小值 */ PROC RANK DATA=samp.class OUT=rc DESCENDING TIES=LOW; VAR age; RANKS agerank; run; /******************************************/ /* PROC STANDARD例子 */ /******************************************/ PROC STANDARD DATA=samp.class OUT=sc MEAN=0 STD=1; VAR age; run; /******************************************/ /* PROC FORMAT例子 */ /******************************************/ proc format; VALUE sexotf 1='男' 2='女'; run; data sexd; input sex; format sex sexotf.; cards; 1 2 ; run; proc print;run; proc format; VALUE $sexf 'F'='女' 'M'='男'; run; data sexd; input sex $; format sex $sexf.; cards; F M ; run; proc print;run; proc format; INVALUE $sexotinf 1='男' 2='女'; run; data sexd; input sex $sexotinf. ; cards; 1 2 ; run; proc print;run; proc format; INVALUE sexinf '男'=1 '女'=2; run; data sexd; input sex sexinf. ; cards; 男 女 ; run; proc print;run; proc format; INVALUE trial 'A'-'M'=1 'N'-'Z'=2 1-999,1001-1999,2001-2999=3 9999=. other=_error_; run; /* 列出WORK库的FORMATS catalog中已保存的格式 */ proc format fmtlib; run; /* 把WORK.FORMATS catalog中保存的格式转存到数据集tfm中 */ proc format cntlout=tfm; run; proc print;run; /******************************************/ /* PROC REPORT例子 */ /******************************************/ /* 以BUSINESS数据集为例。包括国家、行业两个分类变量, * 公司名、雇用人数、销售额、利润等变量。 */ /* 显示所有变量。每一观测列表。 */ proc report data=samp.business(obs=20) nowindows; run; PROC REPORT DATA==samp.c9501 NOWINDOWS; RUN; proc report data=samp.class nowindows; run; /* 仅显示数值型变量时,缺省输出为总和而非单独观测。 */ proc report data=samp.business( keep=employs sales profits) nowindows; run; PROC REPORT DATA=samp.c9501( KEEP=math chinese) NOWINDOWS; RUN; proc report data=samp.class( keep=age height weight) nowindows; run; /* 用COLUMN语句指定要显示的变量。 */ proc report data=samp.business(obs=20) nowindows; COLUMN company industry profits; run; PROC REPORT DATA=samp.c9501 NOWINDOWS; COLUMN name sex math chinese; RUN; proc report data=samp.class nowindows; COLUMN name sex age height weight; run; /* 用DEFINE中的ORDER选项指定排序变量。 多个排序变量时嵌套的次序按COLUMN中的出现次序, 如果COLUMN中没有则按数据集中次序。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex age name height weight; DEFINE sex / ORDER; DEFINE age / ORDER; RUN; /* 用ORDER=指定变量值排序的规则, 取DATA指定按照数据中出现的先后次序, 取FORMATTED指定按照数据值经输出格式转化后的次序(缺省选择), 取FREQ使得数据值按照出现次数由少到多排序, 取INTERNAL是按照数据值内部表示排序。 可以加DESCENDING选项使得变量值按降序排列。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex age name height weight; DEFINE sex / ORDER ORDER=FREQ DESCENDING; DEFINE age / ORDER DESCENDING; RUN; /* 用DEFINE的GROUP选项分组汇总, 对其它数值型变量计算总和(缺省统计量)。 这时分组变量之外不能有字符型变量, 否则无法进行汇总。 */ proc report data=samp.class nowindows; COLUMN sex age height weight; define sex / GROUP; run; proc report data=samp.class nowindows; COLUMN sex age height weight; define sex / GROUP; define age / GROUP; run; /* 用DEFINE的GROUP选项分组汇总, 用DEFINE的ANALYSIS MEAN选项指定其他变量计算组内的平均值。 可以计算统计量包括N, NMISS, MEAN, STD, SUM, VAR, CV, PCTN, PCTSUM, MEDIAN, Q1, Q3, P1, P5, P10, P90, P95, P99等。 */ PROC REPORT DATA=samp.class NOWINDOWS; TITLE 'Mean height and weight in sex groups'; COLUMN sex height weight; DEFINE sex / GROUP; DEFINE height / ANALYSIS MEAN; DEFINE weight / ANALYSIS MEAN; RUN; proc report data=samp.class nowindows; column sex age height weight; define sex / GROUP; define age / GROUP; define height / ANALYSIS MEAN; define weight / ANALYSIS MEAN; run; /* 分组汇总的分组变量也可以指定次序。 不指定次序时各组按格式化输出值排序。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex age height weight; DEFINE sex / GROUP ORDER=FREQ DESCENDING; DEFINE age / GROUP; DEFINE height / ANALYSIS MEAN; DEFINE weight / ANALYSIS MEAN; RUN; /* 定义新变量。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN name height weight ratio; DEFINE name / ORDER; DEFINE height / DISPLAY; DEFINE weight / DISPLAY; DEFINE ratio / COMPUTED FORMAT=6.2; COMPUTE ratio; ratio = weight / height; ENDCOMP; RUN; /* 输出数据集 */ PROC REPORT DATA=samp.class OUT=repd NOWINDOWS; COLUMN sex age height weight; DEFINE sex / GROUP; DEFINE age / GROUP; DEFINE height / ANALYSIS MEAN; DEFINE weight / ANALYSIS MEAN; RUN; PROC PRINT;RUN; /* 在DEFINE语句中用FORMAT=指定该列的输出格式。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex age height weight; DEFINE sex / GROUP; DEFINE age / GROUP; DEFINE height / ANALYSIS MEAN FORMAT=8.2; DEFINE weight / ANALYSIS MEAN FORMAT=8.2; RUN; /* PROC REPORT语句的SPACING=选项指定列间的空格个数。 DEFINE语句的SPACING=选项指定该变量左侧与前一项分隔使用的空格数。 PROC REPORT语句的COLWIDTH=选项指定每一列通用的字符数。 DEFINE语句的WIDTH=指定该列占用的字符数。 DEFINE语句的CENTER、LEFT、RIGHT选项指定 居中对齐、左对齐、右对齐。 */ PROC REPORT DATA=samp.class NOWINDOWS SPACING=5; COLUMN name sex age; DEFINE name / WIDTH=10 RIGHT; DEFINE sex / WIDTH=2; DEFINE age / WIDTH=2; RUN; /* DEFINE语句中的字符串选项表示列标题。 DEFINE语句的选项中可以写字符串作为列标题 (缺省使用变量标签或变量名作为列标题), 列标题中用'/'表示换行,或者把列标题用 两个字符串表示也可以在上下两行显示。 可以用PROC REPORT的SPLIT=选项指定在 列标题字符串中表示换行的字符。 */ PROC REPORT DATA=samp.class NOWINDOWS ; COLUMN sex height; DEFINE sex / GROUP '性 别' WIDTH=2; DEFINE height / '身高' ANALYSIS MEAN FORMAT=8.2; RUN; PROC REPORT DATA=samp.class NOWINDOWS ; COLUMN sex height weight; DEFINE sex / GROUP '性 别' WIDTH=2; DEFINE height / '身高/平均值' ANALYSIS MEAN FORMAT=8.2; DEFINE weight / '体重' '平均值' ANALYSIS MEAN FORMAT=8.2; RUN; /* PROC REPORT的HEADLINE选项在列标题行下划线, HEADSKIP选项在列标题行下空行。 */ PROC REPORT DATA=samp.class NOWINDOWS HEADLINE HEADSKIP; COLUMN sex height; DEFINE sex / GROUP '性 别' WIDTH=2; DEFINE height / '身高' ANALYSIS MEAN FORMAT=8.2; RUN; proc report data=samp.class nowindows headline headskip; title 'Mean height and weight by sex'; column sex height weight; define sex / '性/别' group width=2; define height / '平均' '身高' analysis mean; define weight / '平均' '体重' analysis mean; run; /* 在COLUMN语句中用圆括号把一个字符串和多个输出项组合在一起, 可以使得多个输出项上方共享此字符串作为合并栏目标题。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex age ('平均值' height weight); DEFINE sex / GROUP; DEFINE age / GROUP; DEFINE height / ANALYSIS MEAN; DEFINE weight / ANALYSIS MEAN; RUN; /* 用别名的办法对同一变量计算多种统计量。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex height height=htmin height=htmax; DEFINE sex / '性别' GROUP WIDTH=4; DEFINE height / '平均身高' ANALYSIS MEAN WIDTH=10 FORMAT=6.2; DEFINE htmin / '最低' ANALYSIS MIN FORMAT=5.1; DEFINE htmax / '最高' ANALYSIS MAX FORMAT=5.1; RUN; /* 用COLUMN语句逗号格式指定多个统计量。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex height height,(MEAN MIN MAX); DEFINE sex / '性别' ORDER WIDTH=4; DEFINE height / '身高' ANALYSIS SUM FORMAT=6.2; COMPUTE AFTER sex; LINE @21 '最低身高:' height.min 5.1 +3 '最高身高:' height.max 5.1; LINE ' '; ENDCOMP; RUN; /* 在DEFINE中指定ACROSS变量作为列分组变量,每个值占一列, 统计该值的频数。 特殊变量N代表组内的非缺失观测个数。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex N; DEFINE sex / ACCROSS WIDTH=2; DEFINE N / '总计'; RUN; PROC REPORT DATA=samp.class NOWINDOWS; COLUMN age sex N; DEFINE age / GROUP; DEFINE sex / ACROSS WIDTH=2; DEFINE N / '总计'; RUN; /* 用DEFINE语句的ACROSS选项指定的列分组变量, 可以在COLUMN语句中该变量后面写逗号然后写 一个或多个分组后概括统计的变量,多个时 用圆括号包围。 列分组变量和分析变量在COLUMN语句中的次序也 可以反过来,结果显示略有不同。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex, (height weight); DEFINE sex / ACROSS WIDTH=2; DEFINE height / '平均身高' ANALYSIS MEAN FORMAT=8.2; DEFINE weight / '平均体重' ANALYSIS MEAN FORMAT=8.2; RUN; PROC REPORT DATA=samp.class NOWINDOWS; COLUMN (height weight), sex; DEFINE sex / ACROSS WIDTH=2; DEFINE height / '平均身高' ANALYSIS MEAN FORMAT=8.2; DEFINE weight / '平均体重' ANALYSIS MEAN FORMAT=8.2; RUN; /* 在计算代码块中用_C1_, _C2_等代替列名。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex, (height weight) ratio; DEFINE sex / ACROSS WIDTH=2; DEFINE height / '平均身高' ANALYSIS MEAN FORMAT=8.2; DEFINE weight / '平均体重' ANALYSIS MEAN FORMAT=8.2; DEFINE ratio / '男女平均身高比' COMPUTED FORMAT=6.2; COMPUTE ratio; ratio = _C3_ / _C1_; ENDCOMP; RUN; /* BREAK和RBREAK语句。 BREAK AFTER 语句指定某个分组变量, 在此分组变量每个组末尾显示额外信息, BREAK BEFORE 则显示在每个组开头。 RBREAK AFTER规定如何对所有输出最后额外显示, RBREAK BEFORE规定如何对所有输出开头额外显示。 BREAK的SUMMARIZE选项要求显示分析变量在本组的概括统计量, SKIP选项使得额外输出后空行, OL选项使得额外输出数据行上面画线, DOL选项使得额外输出数据行上面画双线, SUPPRESS选项可以时额外输出行的分组值不显示。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex age height weight; DEFINE sex / GROUP; DEFINE age /GROUP; DEFINE height / ANALYSIS MEAN FORMAT=8.2; DEFINE weight / ANALYSIS MEAN FORMAT=8.2; BREAK AFTER sex / SUMMARIZE SKIP OL; RBREAK AFTER / SUMMARIZE SKIP DOL; RUN; /* 用COMPUTE AFTER定制每组概括和全表概括。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex name height weight; DEFINE sex / ORDER; DEFINE name / DISPLAY; DEFINE height / ANALYSIS MEAN FORMAT=8.2; DEFINE weight / ANALYSIS MEAN FORMAT=8.2; COMPUTE AFTER sex; LINE @11 '平均身高(' sex $2. '): ' @25 height.mean 6.2 @41 '平均体重(' sex $2. '): ' @55 weight.mean 6.2; ENDCOMP; COMPUTE AFTER; LINE ''; LINE @11 '总平均身高: ' height.mean 6.2 @41 '总平均体重: ' weight.mean 6.2; ENDCOMP; RUN; /* 用NOPRINT选项指定需要在计算代码块中使用的变量或别名。 */ PROC REPORT DATA=samp.class NOWINDOWS; COLUMN sex height height=htmin height=htmax; DEFINE sex / '性别' ORDER WIDTH=4; DEFINE height / '身高' ANALYSIS SUM FORMAT=6.2; DEFINE htmin / '最低' ANALYSIS MIN NOPRINT; DEFINE htmax / '最高' ANALYSIS MAX NOPRINT; COMPUTE AFTER sex; LINE @21 '最低身高:' htmin 5.1 +3 '最高身高:' htmax 5.1; LINE ' '; ENDCOMP; RUN; /* PROC REPORT手册例1. 仅使用COLUMN指定输出列, 用RBREAK指定缺省全集概括。 用FORMAT语句规定列输出格式。 NOWD是NOWINDOWS的简写。 */ data grocery; input Sector $ Manager $ Department $ Sales @@; datalines; se 1 np1 50 se 1 p1 100 se 1 np2 120 se 1 p2 80 se 2 np1 40 se 2 p1 300 se 2 np2 220 se 2 p2 70 nw 3 np1 60 nw 3 p1 600 nw 3 np2 420 nw 3 p2 30 nw 4 np1 45 nw 4 p1 250 nw 4 np2 230 nw 4 p2 73 nw 9 np1 45 nw 9 p1 205 nw 9 np2 420 nw 9 p2 76 sw 5 np1 53 sw 5 p1 130 sw 5 np2 120 sw 5 p2 50 sw 6 np1 40 sw 6 p1 350 sw 6 np2 225 sw 6 p2 80 ne 7 np1 90 ne 7 p1 190 ne 7 np2 420 ne 7 p2 86 ne 8 np1 200 ne 8 p1 300 ne 8 np2 420 ne 8 p2 125 ; run; proc format; value $sctrfmt 'se' = 'Southeast' 'ne' = 'Northeast' 'nw' = 'Northwest' 'sw' = 'Southwest'; value $mgrfmt '1' = 'Smith' '2' = 'Jones' '3' = 'Reveiz' '4' = 'Brown' '5' = 'Taylor' '6' = 'Adams' '7' = 'Alomar' '8' = 'Andrews' '9' = 'Pelfrey'; value $deptfmt 'np1' = 'Paper' 'np2' = 'Canned' 'p1' = 'Meat/Dairy' 'p2' = 'Produce'; run; proc report data=grocery nowd; column manager department sales; rbreak after / dol summarize; where sector='se'; format manager $mgrfmt. department $deptfmt. sales dollar11.2; title 'Sales for the Southeast Sector'; title2 "for &sysdate"; run; /* PROC REPORT手册例2. manager作为ORDER变量,以格式化值次序排列。 department作为ORDER变量,以内部编码次序排列。 用BREAK AFTER manage对每一经理组进行缺省概括。 在COMPUTE AFTER中用LINE语句在全表最后加定制概括行。 */ proc report data=grocery nowd colwidth=10 spacing=5 headline headskip; column manager department sales; define manager / order order=formatted format=$mgrfmt.; define department / order order=internal format=$deptfmt.; define sales / analysis sum format=dollar7.2; break after manager / ol summarize skip; compute after; line 'Total sales for these stores were: ' sales.sum dollar9.2; endcomp; where sector='se'; title 'Sales for the Southeast Sector'; run; /* PROC REPORT手册例3. 在COLUMN语句中用sales=salesmin和sales=salesmax 定义了两个别名,这样可以对同一变量在不同输出列计算不同的统计量。 最后salesmin和salesmax两列用了DEFINE语句NOPRINT选项不显示出来 (每一数据行行中的salesmin和salesmax实际是对一个值求最小和最大, 即使显示也等于sales本身,但是需要指定这样的统计量才能在概括行中 对其求最小或最大), 仅在自定义概括行中使用其值。 在LINE语句中,用53*'-'这样的办法输出连续多个相同字符。 */ proc report data=grocery nowd headline headskip; column manager department sales sales=salesmin sales=salesmax; define manager / order order=formatted format=$mgrfmt. 'Manager'; define department / order order=internal format=$deptfmt. 'Department'; define sales / analysis sum format=dollar7.2 'Sales'; define salesmin / analysis min noprint; define salesmax / analysis max noprint; compute after; line ' '; line @7 53*'-'; line @7 '| Departmental sales ranged from' salesmin dollar7.2 +1 'to' +1 salesmax dollar7.2 '. |'; line @7 53*'-'; endcomp; where sector='se'; title 'Sales for the Southeast Sector'; title2 "for &sysdate"; run; /* PROC REPORT手册例4. 分组统计汇总。 用LINE语句输出全表后定制概括,sales.sum表示sales的和。 用CALL DEFINE语句仅对概括行修改了sales列的输出格式。 */ proc report data=grocery nowd headline headskip; column sector manager sales; define sector / group format=$sctrfmt. 'Sector'; define manager / group format=$mgrfmt. 'Manager'; define sales / analysis sum format=comma10.2 'Sales'; break after sector / ol summarize suppress skip; compute after; line 'Combined sales for the northern sectors were' sales.sum dollar9.2 '.'; endcomp; compute sales; if _break_ ne ' ' then call define(_col_,"format","dollar11.2"); endcomp; where sector contains 'n'; title 'Sales Figures for Northern Sectors'; run; /* PROC REPORT手册例5. department指定为ACROSS变量,作为列维分类变量, 并在COLUMN语句用逗号与sales相联使得department 的每类值下面都计算sales的总和。 perish是COMPUTED变量,在计算代码块中定义如何计算。 为了访问列维分类后的列,用_C3_, _C4_这样的名字。 */ proc report data=grocery nowd headline headskip split='*'; column sector manager department,sales perish; define sector / group format=$sctrfmt. 'Sector' ''; define manager / group format=$mgrfmt. 'Manager* '; define department / across format=$deptfmt. '_Department_'; define sales / analysis sum format=dollar11.2 ' '; define perish / computed format=dollar11.2 'Perishable*Total'; break after manager / skip; compute perish; perish=sum(_c3_, _c4_); endcomp; compute after; line @4 57*'-'; line @4 '| Combined sales for meat and dairy : ' @46 _c3_ dollar11.2 ' |'; line @4 '| Combined sales for produce : ' @46 _c4_ dollar11.2 ' |'; line @4 '|' @60 '|'; line @4 '| Combined sales for all perishables: ' @46 _c5_ dollar11.2 ' |'; line @4 57*'-'; endcomp; where sector contains 'n' and (department='p1' or department='p2'); title 'Sales Figures for Perishables in Northern Sectors'; run; /* PROC REPORT手册例6. 在COLUMN语句中用逗号格式对一个变量计算多个统计量。 */ proc report data=grocery nowd headline headskip ls=66 ps=18; column sector manager sales,(Sum Min Max Range Mean Std); define manager / group format=$mgrfmt. id; define sector / group format=$sctrfmt.; define sales / format=dollar11.2 ; title 'Sales Statistics for All Sectors'; run; /* PROC REPORT手册例7. 用OUTREPT=指定一个输出catalog项目保存表定义。 用REPORT=指定表定义。 NAMED选项表示数据中用``变量名=变量值''方法显示, 不显示列标题。 WRAP要求当行超长是直接延长到下一行,而不是拆分成两张表。 */ proc report data=grocery nowd named wrap ls=64 ps=36 outrept=sasuser.reports.namewrap; column sector manager department sales; define sector / format=$sctrfmt.; define manager / format=$mgrfmt.; define department / format=$deptfmt.; define sales / format=dollar11.2; where manager='1'; title "Sales Figures for Smith on &sysdate"; run; proc report data=grocery report=sasuser.reports.namewrap nowd; where sector='sw'; title "Sales Figures for the Southwest Sector on &sysdate"; run; /* PROC REPORT手册例8. 分栏(左右并列)显示表以节省空间。 用PANELS=指定运行最多左右并排表格数。 用PSPACE指定左右并排间隙大小。 仅对LISTINGS输出目标有效。 */ proc report data=grocery nowd headline formchar(2)='~' panels=99 pspace=6 ls=64 ps=18; column manager department sales; define manager / order order=formatted format=$mgrfmt.; define department / order order=internal format=$deptfmt.; define sales / format=dollar7.2; break after manager / skip; where sector='nw' or sector='sw'; title 'Sales for the Western Sectors'; run; /* PROC REPORT手册例9. BREAK AFTER MANAGER的PAGE选项要求每个经理占一页。 COMPUTE BEFORE _PAGE_在每页开头输出自定义内容。 COMPUTE AFTER manager对每个经理输出了自定义的业绩评价。 */ proc report data=grocery nowd headline headskip; title 'Sales for Individual Stores'; column sector manager department sales Profit; define sector / group noprint; define manager / group noprint; define profit / computed format=dollar11.2; define sales / analysis sum format=dollar11.2; define department / group format=$deptfmt.; compute profit; if department='np1' or department='np2' then profit=0.4*sales.sum; else profit=0.25*sales.sum; endcomp; compute before _page_ / left; line sector $sctrfmt. ' Sector'; line 'Store managed by ' manager $mgrfmt.; line ' '; line ' '; line ' '; endcomp; break after manager / ol summarize page; compute after manager; length text $ 35; if sales.sum lt 500 then text='Sales are below the target region.'; else if sales.sum ge 500 and sales.sum lt 1000 then text='Sales are in the target region.'; else if sales.sum ge 1000 then text='Sales exceeded goal!'; line ' '; line text $35.; endcomp; run; /* PROC REPORT手册例10. 统计量名PCTSUM表示当前格占总和的百分比。 COLUMN语句中用括号把一个引导字符串和若干变量组合起来表示多栏标题。 DEFINE语句的FLOW选项允许字符串值在当前列延长到下一行。 */ proc report data=grocery nowd headline; title; column ('Individual Store Sales as a Percent of All Sales' sector manager sales,(sum pctsum) comment); define manager / group format=$mgrfmt.; define sector / group format=$sctrfmt.; define sales / format=dollar11.2 ''; define sum / format=dollar9.2 'Total Sales'; define pctsum / 'Percent of Sales' format=percent6. width=8; define comment / computed width=20 '' flow; compute comment / char length=40; if sales.pctsum gt .15 and _break_ = ' ' then comment='Sales substantially above expectations.'; else comment=' '; endcomp; rbreak after / ol summarize; run; /* PROC REPORT手册例11. 缺省情况下缺失值不列入表内。 加MISSING选项后,分组变量和排序变量的缺失值也作为单独类别。 */ data grocmiss; input Sector $ Manager $ Department $ Sales @@; datalines; se 1 np1 50 . 1 p1 100 se . np2 120 se 1 p2 80 se 2 np1 40 se 2 p1 300 se 2 np2 220 se 2 p2 70 nw 3 np1 60 nw 3 p1 600 . 3 np2 420 nw 3 p2 30 nw 4 np1 45 nw 4 p1 250 nw 4 np2 230 nw 4 p2 73 nw 9 np1 45 nw 9 p1 205 nw 9 np2 420 nw 9 p2 76 sw 5 np1 53 sw 5 p1 130 sw 5 np2 120 sw 5 p2 50 . . np1 40 sw 6 p1 350 sw 6 np2 225 sw 6 p2 80 ne 7 np1 90 ne . p1 190 ne 7 np2 420 ne 7 p2 86 ne 8 np1 200 ne 8 p1 300 ne 8 np2 420 ne 8 p2 125 ; proc report data=grocmiss nowd headline; column sector manager N sales; define sector / group format=$sctrfmt.; define manager / group format=$mgrfmt.; define sales / format=dollar9.2; rbreak after / dol summarize; title 'Summary Report for All Sectors and Managers'; run; proc report data=grocmiss nowd headline missing; column sector manager N sales; define sector / group format=$sctrfmt.; define manager / group format=$mgrfmt.; define sales / format=dollar9.2; rbreak after / dol summarize; run; /* PROC REPORT手册例12. 用OUT=选项把显示结果保存为数据集。 */ proc report data=grocery nowd out=temp( where=(sales gt 1000) ); column manager sales; define manager / group noprint; define sales / analysis sum noprint; run; proc report data=temp box nowd; column manager sales; define manager / group format=$mgrfmt.; define sales / analysis sum format=dollar11.2; title 'Managers with Daily Sales'; title2 'of over'; title3 'One Thousand Dollars'; run; /* PROC REPORT手册例13. 用OUT=输出数据集,可以把计算的新变量输出。 */ title; proc report data=grocery nowd out=profit; column sector manager department sales Profit; define profit / computed; /* Compute values for Profit. */ compute profit; if department='np1' or department='np2' then profit=0.4*sales.sum; else profit=0.25*sales.sum; endcomp; run; proc chart data=profit; block sector / sumvar=profit; format sector $sctrfmt.; format profit dollar7.2; title 'Sum of Profit by Sector'; run; /* PROC REPORT手册例14. 分组变量(GROUP)或列维分组变量(ACROSS)是按照格式化显示值分组的。 所以可以定义输出格式把若干类显示成相同值,于是分到相同组。 */ proc format; value $perish 'p1', 'p2'='Perishable' 'np1','np2'='Nonperishable'; run; proc report data=grocery nowd headline headskip; column manager department,sales sales; define manager / group order=formatted format=$mgrfmt.; define department / across order=formatted format=$perish. ''; define sales / analysis sum format=dollar9.2 width=13; compute after; line ' '; line 'Total sales for these stores were: ' sales.sum dollar9.2; endcomp; title 'Sales Summary for All Stores'; run; /* PROC REPORT手册例15. 在PROC REPORT语句中指定表的各部分的统一样式。 */ ods html body='test15.htm'; ods pdf file='test15.pdf'; ods rtf file='test15.rtf'; proc report data=grocery nowd headline headskip style(report)=[cellspacing=5 borderwidth=10 bordercolor=blue] style(header)=[foreground=yellow font_style=italic font_size=6] style(column)=[foreground=moderate brown font_face=helvetica font_size=4] style(lines)=[foreground=white background=black font_style=italic font_weight=bold font_size=5] style(summary)=[foreground=cx3e3d73 background=cxaeadd9 font_face=helvetica font_size=3 just=r]; column manager department sales; define manager / order order=formatted format=$mgrfmt. 'Manager'; define department / order order=internal format=$deptfmt. 'Department'; break after manager / summarize; compute after manager; line 'Subtotal for ' manager $mgrfmt. 'is ' sales.sum dollar7.2 '.'; endcomp; compute after; line 'Total for all departments is: ' sales.sum dollar7.2 '.'; endcomp; where sector='se'; title 'Sales for the Southeast Sector'; run; ods html close; ods pdf close; ods rtf close; /* PROC REPORT手册例16. 除了PROC REPORT语句中定义统一样式之外, 还在DEFINE语句中定义单独的列样式或列标题样式, 在计算代码块中用CALL DEFINE有条件地给单元格施加不同样式。 */ ods html body='test16.htm'; ods pdf file='test16.pdf'; ods rtf file='test16.rtf'; proc report data=grocery nowd headline headskip style(report)=[cellspacing=5 borderwidth=10 bordercolor=blue] style(header)=[foreground=yellow font_style=italic font_size=6] style(column)=[foreground=moderate brown font_face=helvetica font_size=4] style(lines)=[foreground=white background=black font_style=italic font_weight=bold font_size=5] style(summary)=[foreground=cx3e3d73 background=cxaeadd9 font_face=helvetica font_size=3 just=r]; column manager department sales; define manager / order order=formatted format=$mgrfmt. 'Manager' style(header)=[foreground=white background=black]; define department / order order=internal format=$deptfmt. 'Department' style(column)=[font_style=italic]; break after manager / summarize; compute after manager / style=[font_style=roman font_size=3 font_weight=bold background=white foreground=black]; line 'Subtotal for ' manager $mgrfmt. 'is ' sales.sum dollar7.2 '.'; endcomp; compute sales; if sales.sum>100 and _break_=' ' then call define(_col_, "style", "style=[background=yellow font_face=helvetica font_weight=bold]"); endcomp; compute after; line 'Total for all departments is: ' sales.sum dollar7.2 '.'; endcomp; where sector='se'; title 'Sales for the Southeast Sector'; run; ods html close; ods pdf close; ods rtf close; /************************************************* ************************************************** ** 第四章 ** ************************************************** *************************************************/ title 'One-Sample t Test'; data casetime; input time @@; datalines; 43 90 84 87 116 95 86 99 93 92 121 71 66 98 79 102 60 112 105 98 ; run; proc ttest data=casetime h0=80 alpha=0.1; var time; run; title; data tran; array y(3); do i=1 to 10; do j=1 to 3; y(j) = uniform(0); end; output; end; drop j; run; /* 已知方差的Z检验和Wald检验 */ proc means data=samp.class mean std n; var height; output out=_tmp_1 mean=mu std=sigma n=n; run; /* Z test, sigma known */ data _null_; set _tmp_1; file print; mu0 = 65; sigma0 = 5; z = (mu - mu0)/(sigma0 / sqrt(n)); pvalue = 2*(1 - cdf('normal', abs(z))); put 'Z: ' Z 12.4 ' Pr>|Z|: ' pvalue PVALUE.; run; /* Wald Test, use the estimated sigma as true sigma */ data _null_; set _tmp_1; file print; mu0 = 65; sigma0 = sigma; /* Differ with the previous program here */ z = (mu - mu0)/(sigma0 / sqrt(n)); pvalue = 2*(1 - cdf('normal', abs(z))); put 'Z: ' Z 12.4 ' Pr>|Z|: ' pvalue PVALUE.; run; /* 某类法律案件平均审理时间是否等于80天的单样本t检验。 */ title 'One-Sample t Test'; data time; input time @@; datalines; 43 90 84 87 116 95 86 99 93 92 121 71 66 98 79 102 60 112 105 98 ; run; proc ttest h0=80 alpha=0.1; var time; run; /* 用PROC UNIVARIATE */ data new; set time; y = time - 80; run; proc univariate data=new; var y; run; /* 正态性检验 */ proc univariate data=samp.gpa normal; var gpa; run; /* 两独立样本的t检验 */ proc ttest data=samp.gpa; class sex; var satm; run; /* 非正态时用NPAR1WAY过程比较两独立样本均值 */ proc npar1way data=samp.gpa wilcoxon; class sex; var gpa; run; /* 用TTEST过程进行成对t检验 */ title 'Paired Comparison'; data pressure; input SBPbefore SBPafter @@; datalines; 120 128 124 131 130 131 118 127 140 132 128 125 140 141 135 137 126 118 130 132 126 129 127 135 ; run; proc ttest data=pressure; paired SBPbefore*SBPafter; run; /* 用PROC UNIVARIATE */ data new; set pressure; y = SBPbefore - SBPafter; run; proc univariate data=new; var y; run; title; /* GPA数据集中SATM与SATV的比较 */ proc ttest data=samp.gpa; paired satm*satv; run; /* 用计算差值的办法进行成对t检验 */ data new; set samp.gpa; dmv = satm - satv; keep dmv; run; proc univariate data=new; var dmv; run; /* 单总体比例的假设检验: 检验GPA数据集中女生比例是否等于0.6, 计算置信区间。 */ proc freq data=samp.gpa; tables sex / binomial(p=0.6); run; /* 单总体比例的假设检验: 检验GPA数据集中男生比例是否等于0.5, 用EXACT BINOMIAL语句要求用二项分布计算精确p值。 */ proc freq data=samp.gpa; tables sex / binomial(level='Male' p=0.5); exact binomial; run; data scount; input sex $ count; datalines; Female 145 Male 79 ; run; proc freq data=scount; tables sex / binomial(level='Male' p=0.5); exact binomial; weight count; run; /* 两总体比例的假设检验: 检验吸烟人群的患病率是否高于不吸烟人群: */ data bron; input smoke $ bron $ numcell; label smoke='吸烟' bron='慢性支气管炎'; cards; 吸烟 患病 43 吸烟 未患 162 不吸烟 患病 13 不吸烟 未患 121 ; proc freq data=bron; tables smoke*bron / nopct norow nocol fisher; weight numcell; run; /******************************************/ /* 回归分析例子 */ /******************************************/ /* 残差图的几种情况 */ /* 非线性因素 */ data samp.regdiag1; b0=100; b1=2; b2=0.5; do x=-10 to 10; y = b0 + b1*x + b2*x*x + 2*normal(111); output; end; keep x y; run; /* 忽略重要变量 */ data samp.regdiag2; b0f=100; b0m=200; b1=2; s='F'; do x=10 to 30 by 0.3; y = b0f + b1*x + 5*normal(111); output; end; s='M'; do x=10 to 30 by 0.3; y = b0m + b1*x + 5*normal(111); output; end; keep s x y; run; /* 离群值 */ data samp.regdiag3; b0=100; b1=2; do x=10 to 30 by 0.3; y = b0 + b1*x + 5*normal(111); output; end; x=12; y=100; output; x=25; y=180; output; keep x y; run; /* 方差非齐性 */ data samp.regdiag4; b0=100; b1=2; b2=0.5; do x=10 to 30 by 0.3; y = b0 + b1*x + 0.25*x*normal(111); output; end; keep x y; run; /* 序列自相关 */ data samp.regdiag5; b0=100; b1=2; b2=0.5; i=0; e = 0; e1 = e; do x=10 to 30 by 0.3; i+1; e = 0.8*e1 + normal(111); e1 = e; y = b0 + b1*x + e; output; end; keep i x y; run; /* 共线 */ data samp.regdiag6; b0 = 100; b1 = 1; b2 = 1; do x1=1 to 20; x2 = 0.5*x1 + 0.1*normal(0); y = b0 + b1*x1 + b2*x2 + 2*normal(0); output; end; run; /* 丢失重要自变量 */ data samp.mireg; sex='F'; do x=10 to 20; y = 100 - 2*x + normal(0); output; end; sex='M'; do x=31 to 40; y = 200 -x + normal(0); output; end; run; proc gplot; symbol i=none v=star; plot y*x; run; proc reg; model y=x; plot y*x / pred; run; quit; /* 应该的解法 */ proc sort data=mireg; by sex; run; proc reg; model y=x; by sex; run; /* 或者: */ data mireg2; set mireg; if sex='M' then s=1; else s=0; xs = x*s; run; proc reg; model y = x s xs; plot residual.*predicted.; run; quit; /* 检验男女的模型是否无差别,系数联合检验 */ proc reg; model y = x s xs; test1: test s=0, xs=0; run; quit; /* 或者用PROC MIXED: */ /* 体重对身高、年龄的回归 */ proc reg data=samp.class; var weight height age; model weight=height age; run; model weight=height age / selection=stepwise; run; print cli; run; print clm; run; plot weight * height / conf95; run; plot residual. * predicted.; run; plot rstudent. * obs.; run; quit; /* 回归的ODS图形 */ ods graphics on; proc reg data=samp.class; model weight=height; quit; ods graphics off; /* 联合检验 */ proc reg data=samp.fitness; model oxygen = age weight runtime rstpulse runpulse maxpulse; test1: test rstpulse=runpulse, rstpulse=maxpulse; test2: test weight=0, rstpulse=0; quit; /* 美国人口增长数据: 二次多项式回归 */ data USPopulation; input Population @@; retain Year 1780; Year=Year+10; YearSq=Year*Year; Population=Population/1000; datalines; 3929 5308 7239 9638 12866 17069 23191 31443 39818 50155 62947 75994 91972 105710 122775 131669 151325 179323 203211 226542 248710 281422 ; run; symbol1 c=blue; proc reg data=USPopulation; var YearSq; model Population=Year / r cli clm vif; plot r.*p. / cframe=ligr; run; add YearSq; print; plot / cframe=ligr; run; quit; /* 中心化自变量以克服高次项和交叉项引起的共线性 */ data new; set USPopulation; Year = Year - 1880; YearSq = Year*Year; run; proc reg data=new outest=beta; model Population = Year YearSq / collin; quit; proc print data=beta;run; /* dwProb 选项检验序列相关性*/ proc reg data=new; model Population = Year YearSq / dwProb; quit; /* 用保存的回归系数作回归曲线数据集,与原始数据集纵向合并,作overlay图形 */ data gr; set beta(rename=(year=beta1 yearsq=beta2)); population=.; do year=1790 to 2000 by 1; x = year - 1880; fitted = intercept + beta1*x + beta2*x**2; output; end; drop x; run; data gr2; set USPopulation gr; run; proc gplot data=gr2; symbol1 i=none v=square c=black r=1; symbol2 i=join v=none c=red w=3; plot population*year=1 fitted*year=2 /overlay; run; quit; /* 回归分析: 身体健康指标, 自变量选择 */ *-------------------Data on Physical Fitness-------------------* | These measurements were made on men involved in a physical | | fitness course at N.C.State Univ. The variables are Age | | (years), Weight (kg), Oxygen intake rate (ml per kg body | | weight per minute), time to run 1.5 miles (minutes), heart | | rate while resting, heart rate while running (same time | | Oxygen rate measured), and maximum heart rate recorded while | | running. | | ***Certain values of MaxPulse were changed for this analysis.| *--------------------------------------------------------------*; data fitness; input Age Weight Oxygen RunTime RestPulse RunPulse MaxPulse @@; datalines; 44 89.47 44.609 11.37 62 178 182 40 75.07 45.313 10.07 62 185 185 44 85.84 54.297 8.65 45 156 168 42 68.15 59.571 8.17 40 166 172 38 89.02 49.874 9.22 55 178 180 47 77.45 44.811 11.63 58 176 176 40 75.98 45.681 11.95 70 176 180 43 81.19 49.091 10.85 64 162 170 44 81.42 39.442 13.08 63 174 176 38 81.87 60.055 8.63 48 170 186 44 73.03 50.541 10.13 45 168 168 45 87.66 37.388 14.03 56 186 192 45 66.45 44.754 11.12 51 176 176 47 79.15 47.273 10.60 47 162 164 54 83.12 51.855 10.33 50 166 170 49 81.42 49.156 8.95 44 180 185 51 69.63 40.836 10.95 57 168 172 51 77.91 46.672 10.00 48 162 168 48 91.63 46.774 10.25 48 162 164 49 73.37 50.388 10.08 67 168 168 57 73.37 39.407 12.63 58 174 176 54 79.38 46.080 11.17 62 156 165 52 76.32 45.441 9.63 48 164 166 50 70.87 54.625 8.92 48 146 155 51 67.25 45.118 11.08 48 172 172 54 91.63 39.203 12.88 44 168 172 51 73.71 45.790 10.47 59 186 188 57 59.08 50.545 9.93 49 148 155 49 76.32 48.673 9.40 56 186 188 48 61.24 47.920 11.50 52 170 176 52 82.78 47.467 10.50 53 170 172 ; proc reg data=fitness; model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse / selection=forward; model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse / selection=backward; model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse / selection=maxr; run; /* 回归中简单图形 */ data fitness; set fitness; label Age ='age(years)' Weight ='weight(kg)' Oxygen ='oxygen uptake(ml/kg/min)' RunTime ='1.5 mile time(min)' RestPulse='rest pulse' RunPulse ='running pulse' MaxPulse ='maximum running pulse'; run; proc reg data=fitness; model Oxygen=RunTime; plot Oxygen*RunTime / cframe=ligr; run; /* 在回归中用图形参数控制图形样式 */ goptions ctitle=black htitle=3.5pct ftitle=swiss ctext =magenta htext =3.0pct ftext =swiss cback =ligr border; symbol1 v=circle c=red h=1 w=2; title1 'Selection=Rsquare'; title2 'plot Rsquare versus the number of parameters P in ' 'each model'; proc reg data=fitness; model Oxygen=Age Weight RunTime RunPulse RestPulse MaxPulse / selection=rsquare noprint; plot rsq.*np. / aic bic edf gmsep jp np pc sbc sp haxis=2 to 7 by 1 caxis=red cframe=white ctext=blue modellab='Full Model' modelht=2.4 statht=2.4; run; /* 回归的QQ图和PP图 */ data annote1; length function color $8; retain ysys xsys '2' color 'black'; function='move'; x=0; y=0; output; function='draw'; x=1; y=1; output; run; symbol1 c=blue; proc reg data=fitness; title 'PP Plot'; model Oxygen=RunTime / noprint; plot npp.*r. / annotate=annote1 nostat cframe=ligr modellab="'Best' Two-Parameter Model:"; run; title 'QQ Plot'; plot r.*nqq. / noline mse cframe=ligr modellab="'Best' Two-Parameter Model:"; run; quit; /* 回归预测和置信限图 */ legend1 position=(bottom left inside) across=1 cborder=red offset=(0,0) shape=symbol(3,1) label=none value=(height=.8); title 'Prediction Intervals'; symbol1 c=yellow v=- h=1; symbol2 c=red; symbol3 c=blue; symbol4 c=blue; proc reg data=fitness; model Oxygen=RunTime / noprint; plot Oxygen*RunTime / pred nostat mse aic bic caxis=red ctext=blue cframe=ligr legend=legend1 modellab=' '; run; quit; /* 体重对身高和年龄的回归,分男女生分别估计 */ *------------Data on Age, Weight, and Height of Children-------* | Age (months), height (inches), and weight (pounds) were | | recorded for a group of school children. | | From Lewis and Taylor (1967). | *--------------------------------------------------------------*; data htwt; input sex $ age :3.1 height weight @@; datalines; f 143 56.3 85.0 f 155 62.3 105.0 f 153 63.3 108.0 f 161 59.0 92.0 f 191 62.5 112.5 f 171 62.5 112.0 f 185 59.0 104.0 f 142 56.5 69.0 f 160 62.0 94.5 f 140 53.8 68.5 f 139 61.5 104.0 f 178 61.5 103.5 f 157 64.5 123.5 f 149 58.3 93.0 f 143 51.3 50.5 f 145 58.8 89.0 f 191 65.3 107.0 f 150 59.5 78.5 f 147 61.3 115.0 f 180 63.3 114.0 f 141 61.8 85.0 f 140 53.5 81.0 f 164 58.0 83.5 f 176 61.3 112.0 f 185 63.3 101.0 f 166 61.5 103.5 f 175 60.8 93.5 f 180 59.0 112.0 f 210 65.5 140.0 f 146 56.3 83.5 f 170 64.3 90.0 f 162 58.0 84.0 f 149 64.3 110.5 f 139 57.5 96.0 f 186 57.8 95.0 f 197 61.5 121.0 f 169 62.3 99.5 f 177 61.8 142.5 f 185 65.3 118.0 f 182 58.3 104.5 f 173 62.8 102.5 f 166 59.3 89.5 f 168 61.5 95.0 f 169 62.0 98.5 f 150 61.3 94.0 f 184 62.3 108.0 f 139 52.8 63.5 f 147 59.8 84.5 f 144 59.5 93.5 f 177 61.3 112.0 f 178 63.5 148.5 f 197 64.8 112.0 f 146 60.0 109.0 f 145 59.0 91.5 f 147 55.8 75.0 f 145 57.8 84.0 f 155 61.3 107.0 f 167 62.3 92.5 f 183 64.3 109.5 f 143 55.5 84.0 f 183 64.5 102.5 f 185 60.0 106.0 f 148 56.3 77.0 f 147 58.3 111.5 f 154 60.0 114.0 f 156 54.5 75.0 f 144 55.8 73.5 f 154 62.8 93.5 f 152 60.5 105.0 f 191 63.3 113.5 f 190 66.8 140.0 f 140 60.0 77.0 f 148 60.5 84.5 f 189 64.3 113.5 f 143 58.3 77.5 f 178 66.5 117.5 f 164 65.3 98.0 f 157 60.5 112.0 f 147 59.5 101.0 f 148 59.0 95.0 f 177 61.3 81.0 f 171 61.5 91.0 f 172 64.8 142.0 f 190 56.8 98.5 f 183 66.5 112.0 f 143 61.5 116.5 f 179 63.0 98.5 f 186 57.0 83.5 f 182 65.5 133.0 f 182 62.0 91.5 f 142 56.0 72.5 f 165 61.3 106.5 f 165 55.5 67.0 f 154 61.0 122.5 f 150 54.5 74.0 f 155 66.0 144.5 f 163 56.5 84.0 f 141 56.0 72.5 f 147 51.5 64.0 f 210 62.0 116.0 f 171 63.0 84.0 f 167 61.0 93.5 f 182 64.0 111.5 f 144 61.0 92.0 f 193 59.8 115.0 f 141 61.3 85.0 f 164 63.3 108.0 f 186 63.5 108.0 f 169 61.5 85.0 f 175 60.3 86.0 f 180 61.3 110.5 m 165 64.8 98.0 m 157 60.5 105.0 m 144 57.3 76.5 m 150 59.5 84.0 m 150 60.8 128.0 m 139 60.5 87.0 m 189 67.0 128.0 m 183 64.8 111.0 m 147 50.5 79.0 m 146 57.5 90.0 m 160 60.5 84.0 m 156 61.8 112.0 m 173 61.3 93.0 m 151 66.3 117.0 m 141 53.3 84.0 m 150 59.0 99.5 m 164 57.8 95.0 m 153 60.0 84.0 m 206 68.3 134.0 m 250 67.5 171.5 m 176 63.8 98.5 m 176 65.0 118.5 m 140 59.5 94.5 m 185 66.0 105.0 m 180 61.8 104.0 m 146 57.3 83.0 m 183 66.0 105.5 m 140 56.5 84.0 m 151 58.3 86.0 m 151 61.0 81.0 m 144 62.8 94.0 m 160 59.3 78.5 m 178 67.3 119.5 m 193 66.3 133.0 m 162 64.5 119.0 m 164 60.5 95.0 m 186 66.0 112.0 m 143 57.5 75.0 m 175 64.0 92.0 m 175 68.0 112.0 m 175 63.5 98.5 m 173 69.0 112.5 m 170 63.8 112.5 m 174 66.0 108.0 m 164 63.5 108.0 m 144 59.5 88.0 m 156 66.3 106.0 m 149 57.0 92.0 m 144 60.0 117.5 m 147 57.0 84.0 m 188 67.3 112.0 m 169 62.0 100.0 m 172 65.0 112.0 m 150 59.5 84.0 m 193 67.8 127.5 m 157 58.0 80.5 m 168 60.0 93.5 m 140 58.5 86.5 m 156 58.3 92.5 m 156 61.5 108.5 m 158 65.0 121.0 m 184 66.5 112.0 m 156 68.5 114.0 m 144 57.0 84.0 m 176 61.5 81.0 m 168 66.5 111.5 m 149 52.5 81.0 m 142 55.0 70.0 m 188 71.0 140.0 m 203 66.5 117.0 m 142 58.8 84.0 m 189 66.3 112.0 m 188 65.8 150.5 m 200 71.0 147.0 m 152 59.5 105.0 m 174 69.8 119.5 m 166 62.5 84.0 m 145 56.5 91.0 m 143 57.5 101.0 m 163 65.3 117.5 m 166 67.3 121.0 m 182 67.0 133.0 m 173 66.0 112.0 m 155 61.8 91.5 m 162 60.0 105.0 m 177 63.0 111.0 m 177 60.5 112.0 m 175 65.5 114.0 m 166 62.0 91.0 m 150 59.0 98.0 m 150 61.8 118.0 m 188 63.3 115.5 m 163 66.0 112.0 m 171 61.8 112.0 m 162 63.0 91.0 m 141 57.5 85.0 m 174 63.0 112.0 m 142 56.0 87.5 m 148 60.5 118.0 m 140 56.8 83.5 m 160 64.0 116.0 m 144 60.0 89.0 m 206 69.5 171.5 m 159 63.3 112.0 m 149 56.3 72.0 m 193 72.0 150.0 m 194 65.3 134.5 m 152 60.8 97.0 m 146 55.0 71.5 m 139 55.0 73.5 m 186 66.5 112.0 m 161 56.8 75.0 m 153 64.8 128.0 m 196 64.5 98.0 m 164 58.0 84.0 m 159 62.8 99.0 m 178 63.8 112.0 m 153 57.8 79.5 m 155 57.3 80.5 m 178 63.5 102.5 m 142 55.0 76.0 m 164 66.5 112.0 m 189 65.0 114.0 m 164 61.5 140.0 m 167 62.0 107.5 m 151 59.3 87.0 ; run; title '----- Data on age, weight, and height of children ------'; proc reg outest=est1 outsscp=sscp1 rsquare; by sex; eq1: model weight=height; eq2: model weight=height age; proc print data=sscp1; title2 'SSCP type data set'; proc print data=est1; title2 'EST type data set'; run; /* 带有示性变量(二值分类变量)自变量的回归。 * 因变量为保险公司采纳新险种的时间。自变量为公司规模和种类,种类是0,1变量, * 0表示mutual fund类型,1表示stock类型。 * 种类不同有不同的截距项和斜率项,不同的斜率项用数据步生成的sizetype变量表示。 */ title 'Regression With Quantitative and Qualitative Variables'; data insurance; input time size type @@; sizetype=size*type; datalines; 17 151 0 26 92 0 21 175 0 30 31 0 22 104 0 0 277 0 12 210 0 19 120 0 4 290 0 16 238 0 28 164 1 15 272 1 11 295 1 38 68 1 31 85 1 21 224 1 20 166 1 13 305 1 30 124 1 14 246 1 ; run; proc reg data=insurance; model time = size type sizetype; run; delete sizetype; print; run; output out=out r=r p=p; run; quit; symbol1 v='0' c=blue f=swissb; symbol2 v='1' c=yellow f=swissb; axis1 label=(angle=90); proc gplot data=out; plot r*p=type / nolegend vaxis=axis1 cframe=ligr; plot p*size=type / nolegend vaxis=axis1 cframe=ligr; run; /******************************************/ /* 方差分析例子 */ /******************************************/ proc anova data=samp.veneer; class brand; model wear=brand; run; quit; proc npar1way data=samp.veneer wilcoxon; class brand; var wear; run; proc anova data=samp.veneer; class brand; model wear=brand; means brand; run; means brand / t; run; means brand / bon; run; means brand/ regwq; run; data rubber; input A B STREN; cards; 1 1 31 1 1 33 1 2 34 1 2 36 1 3 35 1 3 36 1 4 39 1 4 38 2 1 33 …………… ; run; **************************************************; data rubber; do a=1 to 3; do b=1 to 4; do r=1 to 2; input stren @@; output; end; end; end; cards; 31 33 34 36 35 36 39 38 33 34 36 37 37 39 38 41 35 37 37 38 39 40 42 44 ; run; **************************************************; proc anova data=rubber; class a b; model stren = a b a*b; run; **************************************************; proc anova data=rubber; class a b; model stren = a b; run; means a b; run; **************************************************; data exp; input temp time conc manu mix prod; cards; 1 1 1 1 1 65 1 1 1 2 2 74 1 2 2 1 2 71 1 2 2 2 1 73 2 1 2 1 2 70 2 1 2 2 1 73 2 2 1 1 1 62 2 2 1 2 2 69 ; run; proc anova data=exp; class temp time conc manu mix; model prod = temp--mix; means temp--mix / t; run; /* One sample proportion test, macro */ %MACRO percentzt(n,n1,p0); data _null_; file print; p0 = &p0.; n = &n.; n1 = &n1.; xbar = n1/n; Z = (xbar - p0)/sqrt(p0 * (1-p0)/n); ptwosided = 2*(1 - probnorm(abs(Z))); *prightsided = 1 - probnorm(Z); *pleftsided = probnorm(Z); put '===== Test for percent ====='; put 'n = ' n ' p =' xbar; put 'p0 = ' p0; put 'Z = ' Z; put 'Pr > |Z|: ' ptwosided pvalue.; *put 'Pr > Z: ' prightsided pvalue.; *put 'Pr < Z: ' pleftsided pvalue.; run; %MEND percentzt; %percentzt(100,5,0.08); /* 单总体比例检验 */ proc freq data=samp.class; tables sex / binomial(p=0.5 level=2); exact binomial; run; data aa; input sex $ nums; cards; F 9 M 10 ; run; proc freq data=aa; tables sex / binomial(p=0.5 level=2); exact binomial; weight nums; run; data accident; input day times @@; cards; 1 9 2 10 3 11 4 8 5 13 6 12 ; run; proc freq data=accident; tables day / chisq testp=(0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667); weight times; run; %macro check_accident; data accident; input day times @@; cards; 1 9 2 10 3 11 4 8 5 13 6 12 ; run; %let onesix = %sysevalf(1/6); proc freq data=accident; tables day / chisq testp=(&onesix &onesix &onesix &onesix &onesix &onesix); weight times; run; %mend check_accident; **************************************************; data class; input sno sex $ from $; label sex='性别' from='来源'; cards; 1 男 本地 2 女 外地 3 男 外地 …………/* 所有学生的记录 */ ; **************************************************; proc freq data=class; tables from * sex; run; **************************************************; data classt; input from $ sex $ numcell; label sex='性别' from='来源'; cards; 本地 男 4 本地 女 6 外地 男 14 外地 女 7 ; run; **************************************************; proc freq data=classt; tables from * sex; weight numcell; run; **************************************************; proc freq data=classt; tables from * sex / nopct norow nocol; weight numcell; run; **************************************************; data bron; input smoke $ bron $ numcell; label smoke='吸烟' bron='慢性支气管炎'; cards; 吸烟 患病 43 吸烟 未患 162 不吸烟 患病 13 不吸烟 未患 121 ; proc freq data=bron; tables smoke*bron / nopct norow nocol chisq expected; weight numcell; run; /* Two-sample proportion test, macro */ %MACRO percent2z(n1,s1,n2,s2); data _null_; file print; n1=&n1; s1=&s1; n2=&n2; s2=&s2; hatp = (s1+s2)/(n1+n2); hatp1 = s1/n1; hatp2 = s2/n2; Z2s = (hatp1 - hatp2) / sqrt(hatp*(1-hatp)*(1/n1 + 1/n2)); Z1s = (hatp1 - hatp2) / sqrt(hatp1*(1-hatp1)/n1 +hatp2*(1-hatp2)/n2); ptwosided = 2*(1 - probnorm(abs(Z2s))); prightsided = 1 - probnorm(Z1s); pleftsided = probnorm(Z1s); put '===== Test for percent ====='; put 'n1 = ' n1 ' s1 =' s1 ' p1=' hatp1; put 'n2 = ' n2 ' s2 =' s2 ' p2=' hatp2; put 'Pr > |Z|: ' ptwosided pvalue.; put 'Pr > Z: ' prightsided pvalue.; put 'Pr < Z: ' pleftsided pvalue.; run; %MEND percent2z; %percent2z(100,10,100,8); **************************************************; data cows; input herdsize disease numcell; label herdsize='牛群大小' disease='患病程度'; cards; 1 0 9 1 1 5 1 2 9 2 0 18 2 1 4 2 2 19 3 0 11 3 1 88 3 2 136 ; run; **************************************************; proc freq data=cows; tables herdsize*disease / measures chisq nopercent nocol; weight numcell; title '奶牛疾病数据分析'; run; DATA TEMPERAT; INPUT CITY $1-15 JANUARY JULY; CARDS; MOBILE 10.7 27.6 PHOENIX 10.7 32.9 LITTLE ROCK 4.2 27.4 SACRAMENTO 7.3 24.0 DENVER -1.2 22.8 HARTFORD -4.0 22.6 WILMINGTON 0.0 24.3 WASHINGTON DC 2.0 25.9 JACKSONVILLE 12.6 27.2 MIAMI 19.6 27.9 ATLANTA 5.8 25.6 BOISE -1.7 23.6 CHICAGO -5.1 22.2 PEORIA -4.6 23.9 INDIANAPOLIS -2.3 23.9 DES MOINES -7.0 23.9 WICHITA -0.4 27.1 LOUISVILLE 0.7 24.9 NEW ORLEANS 11.6 27.7 PORTLAND, MAINE -5.8 20.0 BALTIMORE 0.8 24.8 BOSTON -1.6 22.9 DETROIT -3.6 22.9 SAULT STE MARIE -9.9 17.7 DULUTH -13.1 18.7 MINNEAPOLIS -11.0 22.2 JACKSON 8.4 27.6 KANSAS CITY -2.3 26.0 ST LOUIS -0.4 25.9 GREAT FALLS -6.4 20.7 OMAHA -5.2 25.1 RENO -0.1 20.7 CONCORD -6.3 20.9 ATLANTIC CITY 0.4 23.9 ALBUQUERQUE 1.8 25.9 ALBANY -5.8 22.2 BUFFALO -4.6 21.2 NEW YORK 0.1 24.8 CHARLOTTE 5.6 25.8 RALEIGH 4.7 25.3 BISMARCK -13.2 21.6 CINCINNATI -0.5 24.2 CLEVELAND -2.8 21.9 COLUMBUS -2.0 23.1 OKLAHOMA CITY 2.7 27.5 PORTLAND, OREG 3.4 19.5 PHILADELPHIA 0.2 24.9 PITTSBURGH -2.2 22.2 PROVIDENCE -2.0 22.3 COLUMBIA 7.4 27.3 SIOUX FALLS -9.9 22.9 MEMPHIS 4.7 26.4 NASHVILLE 3.5 26.4 DALLAS 7.1 29.3 EL PASO 6.4 27.9 HOUSTON 11.2 28.5 SALT LAKE CITY -2.2 24.8 BURLINGTON -8.4 21.0 NORFOLK 4.7 25.7 RICHMOND 3.1 25.5 SPOKANE -3.7 20.9 CHARLESTON, WV 1.4 23.9 MILWAUKEE -7.0 21.1 CHEYENNE -3.0 20.6 ; PROC PRINCOMP COV OUT=PRIN; VAR JULY JANUARY; RUN; DATA CRIME; TITLE '各州每十万人的犯罪率'; INPUT STATE $1-15 MURDER RAPE ROBBERY ASSAULT BURGLARY LARCENY AUTO; CARDS; ALABAMA 14.2 25.2 96.8 278.3 1135.5 1881.9 280.7 ALASKA 10.8 51.6 96.8 284.0 1331.7 3369.8 753.3 ARIZONA 9.5 34.2 138.2 312.3 2346.1 4467.4 439.5 ARKANSAS 8.8 27.6 83.2 203.4 972.6 1862.1 183.4 CALIFORNIA 11.5 49.4 287.0 358.0 2139.4 3499.8 663.5 COLORADO 6.3 42.0 170.7 292.9 1935.2 3903.2 477.1 CONNECTICUT 4.2 16.8 129.5 131.8 1346.0 2620.7 593.2 DELAWARE 6.0 24.9 157.0 194.2 1682.6 3678.4 467.0 FLORIDA 10.2 39.6 187.9 449.1 1859.9 3840.5 351.4 GEORGIA 11.7 31.1 140.5 256.5 1351.1 2170.2 297.9 HAWAII 7.2 25.5 128.0 64.1 1911.5 3920.4 489.4 IDAHO 5.5 19.4 39.6 172.5 1050.8 2599.6 237.6 ILLINOIS 9.9 21.8 211.3 209.0 1085.0 2828.5 528.6 INDIANA 7.4 26.5 123.2 153.5 1086.2 2498.7 377.4 IOWA 2.3 10.6 41.2 89.8 812.5 2685.1 219.9 KANSAS 6.6 22.0 100.7 180.5 1270.4 2739.3 244.3 KENTUCKY 10.1 19.1 81.1 123.3 872.2 1662.1 245.4 LOUISIANA 15.5 30.9 142.9 335.5 1165.5 2469.9 337.7 MAINE 2.4 13.5 38.7 170.0 1253.1 2350.7 246.9 MARYLAND 8.0 34.8 292.1 358.9 1400.0 3177.7 428.5 MASSACHUSETTS 3.1 20.8 169.1 231.6 1532.2 2311.3 1140.1 MICHIGAN 9.3 38.9 261.9 274.6 1522.7 3159.0 545.5 MINNESOTA 2.7 19.5 85.9 85.8 1134.7 2559.3 343.1 MISSISSIPPI 14.3 19.6 65.7 189.1 915.6 1239.9 144.4 MISSOURI 9.6 28.3 189.0 233.5 1318.3 2424.2 378.4 MONTANA 5.4 16.7 39.2 156.8 804.9 2773.2 309.2 NEBRASKA 3.9 18.1 64.7 112.7 760.0 2316.1 249.1 NEVADA 15.8 49.1 323.1 355.0 2453.1 4212.6 559.2 NEW HAMPSHIRE 3.2 10.7 23.2 76.0 1041.7 2343.9 293.4 NEW JERSEY 5.6 21.0 180.4 185.1 1435.8 2774.5 511.5 NEW MEXICO 8.8 39.1 109.6 343.4 1418.7 3008.6 259.5 NEW YORK 10.7 29.4 472.6 319.1 1728.0 2782.0 745.8 NORTH CAROLINA 10.6 17.0 61.3 318.3 1154.1 2037.8 192.1 NORTH DAKOTA 0.9 9.0 13.3 43.8 446.1 1843.0 144.7 OHIO 7.8 27.3 190.5 181.1 1216.0 2696.8 400.4 OKLAHOMA 8.6 29.2 73.8 205.0 1288.2 2228.1 326.8 OREGON 4.9 39.9 124.1 286.9 1636.4 3506.1 388.9 PENNSYLVANIA 5.6 19.0 130.3 128.0 877.5 1624.1 333.2 RHODE ISLAND 3.6 10.5 86.5 201.0 1489.5 2844.1 791.4 SOUTH CAROLINA 11.9 33.0 105.9 485.3 1613.6 2342.4 245.1 SOUTH DAKOTA 2.0 13.5 17.9 155.7 570.5 1704.4 147.5 TENNESSEE 10.1 29.7 145.8 203.9 1259.7 1776.5 314.0 TEXAS 13.3 33.8 152.4 208.2 1603.1 2988.7 397.6 UTAH 3.5 20.3 68.8 147.3 1171.6 3004.6 334.5 VERMONT 1.4 15.9 30.8 101.2 1348.2 2201.0 265.2 VIRGINIA 9.0 23.3 92.1 165.7 986.2 2521.2 226.7 WASHINGTON 4.3 39.6 106.2 224.8 1605.6 3386.9 360.3 WEST VIRGINIA 6.0 13.2 42.2 90.9 597.4 1341.7 163.3 WISCONSIN 2.8 12.9 52.2 63.7 846.9 2614.2 220.7 WYOMING 5.4 21.9 39.7 173.9 811.6 2772.2 282.0 ; PROC PRINCOMP OUT=CRIMCOMP; RUN; PROC SORT; BY PRIN1; PROC PRINT; ID STATE; VAR PRIN1 PRIN2 MURDER RAPE ROBBERY ASSAULT BURGLARY LARCENY AUTO; TITLE2 '各州按第一主分量作为总犯罪率排列'; PROC SORT; BY PRIN2; PROC PRINT; ID STATE; VAR PRIN1 PRIN2 MURDER RAPE ROBBERY ASSAULT BURGLARY LARCENY AUTO; TITLE2 '各州按第二主分量作为金钱犯罪与暴力犯罪对比的排列'; GOPTIONS FTEXT='宋体'; PROC GPLOT; PLOT PRIN2*PRIN1=STATE; TITLE2 '前两个主分量的散点图'; PROC GPLOT; PLOT PRIN3*PRIN1=STATE; TITLE2 '第一、三主分量的散点图'; RUN; /* 警察14个评价指标的因子分析. 103个警察,14个评价指标, * 试图找出评价指标的内在因素。 */ options validvarname=any; data jobratings; input ('Communication Skills'n 'Problem Solving'n 'Learning Ability'n 'Judgment Under Pressure'n 'Observational Skills'n 'Willingness to Confront Problems'n 'Interest in People'n 'Interpersonal Sensitivity'n 'Desire for Self-Improvement'n 'Appearance'n 'Dependability'n 'Physical Ability'n 'Integrity'n 'Overall Rating'n) (1.); datalines; 26838853879867 74758876857667 56757863775875 67869777988997 99997798878888 89897899888799 89999889899798 87794798468886 35652335143113 89888879576867 76557899446397 97889998898989 76766677598888 77667676779677 63839932588856 25738811284915 88879966797988 87979877959679 87989975878798 99889988898888 78876765687677 88889888899899 88889988878988 67646577384776 78778788799997 76888866768667 67678665746776 33424476664855 65656765785766 54566676565866 56655566656775 88889988868887 89899999898799 98889999899899 57554776468878 53687777797887 68666716475767 78778889798997 67364767565846 77678865886767 68698955669998 55546866663886 68888999998989 97787888798999 76677899799997 44754687877787 77876678798888 76668778799797 57653634361543 76777745653656 76766665656676 88888888878789 88977888869778 58894888747886 58674565473676 76777767777777 77788878789798 98989987999868 66729911474713 98889976999988 88786856667748 77868887897889 99999986999999 46688587616886 66755778486776 87777788889797 65666656545976 73574488887687 74755556586596 76677778789797 87878746777667 86776955874877 77888767778678 65778787778997 58786887787987 65787766676778 86777875468777 67788877757777 77778967855867 67887876767777 24786585535866 46532343542533 35566766676784 11231214211211 76886588536887 57784788688589 56667766465666 66787778778898 77687998877997 76668888546676 66477987589998 86788976884597 77868765785477 99988888987888 65948933886457 99999877988898 96636736876587 98676887798968 87878877898979 88897888888788 99997899799799 99899899899899 76656399567486 ; run; proc factor data=jobratings(drop='Overall Rating'n) priors=smc rotate=varimax score outstat=jobstat; run; proc score data=jobratings(drop='Overall Rating'n) score=jobstat out=jobscore; run; DATA SOCECON; TITLE '五个经济指标的分析'; INPUT POP SCHOOL EMPLOY SERVICES HOUSE; CARDS; 5700 12.8 2500 270 25000 1000 10.9 600 10 10000 3400 8.8 1000 10 9000 3800 13.6 1700 140 25000 4000 12.8 1600 140 25000 8200 8.3 2600 60 12000 1200 11.4 400 10 16000 9100 11.5 3300 60 14000 9900 12.5 3400 180 18000 9600 13.7 3600 390 25000 9600 9.6 3300 80 12000 9400 11.4 4000 100 13000 ; PROC FACTOR DATA=SOCECON priors=smc; TITLE2 '主因子分析'; RUN; PROC FACTOR DATA=SOCECON PRIORS=SMC ROTATE=PROMAX REORDER; TITLE2 '主因子分析及PROMAX斜交旋转'; RUN; PROC FACTOR DATA=SOCECON PRIORS=SMC ROTATE=VARIMAX REORDER SCORE OUTSTAT=OUTF; TITLE2 '主因子分析及VARIMAX正交旋转'; RUN; PROC SCORE DATA=SOCECON SCORE=OUTF OUT=OUTS; TITLE2 ' VARIMAX正交旋转后的主因子得分'; RUN; /* 五种作物遥感数据的判别分析 */ data crops; title '五种作物遥感数据的判别分析'; input crop $ 1-10 x1-x4 xvalues $ 11-21; cards; CORN 16 27 31 33 CORN 15 23 30 30 CORN 16 27 27 26 CORN 18 20 25 23 CORN 15 15 31 32 CORN 15 32 32 15 CORN 12 15 16 73 SOYBEANS 20 23 23 25 SOYBEANS 24 24 25 32 SOYBEANS 21 25 23 24 SOYBEANS 27 45 24 12 SOYBEANS 12 13 15 42 SOYBEANS 22 32 31 43 COTTON 31 32 33 34 COTTON 29 24 26 28 COTTON 34 32 28 45 COTTON 26 25 23 24 COTTON 53 48 75 26 COTTON 34 35 25 78 SUGARBEETS22 23 25 42 SUGARBEETS25 25 24 26 SUGARBEETS34 25 16 52 SUGARBEETS54 23 21 54 SUGARBEETS25 43 32 15 SUGARBEETS26 54 2 54 CLOVER 12 45 32 54 CLOVER 24 58 25 34 CLOVER 87 54 61 21 CLOVER 51 31 31 16 CLOVER 96 48 54 62 CLOVER 31 31 11 11 CLOVER 56 13 13 71 CLOVER 32 13 27 32 CLOVER 36 26 54 32 CLOVER 53 08 06 54 CLOVER 32 32 62 16 ; run; proc discrim data=crops outstat=cropstat method=normal pool=yes list crossvalidate; class crop; priors proportional; id xvalues; var x1-x4; title2 '使用线性判别函数'; run; data test; input crop $ 1-10 x1-x4 xvalues $ 11-21; cards; CORN 16 27 31 33 SOYBEANS 21 25 23 24 COTTON 29 24 26 28 SUGARBEETS54 23 21 54 CLOVER 32 32 62 16 ; proc discrim data=cropstat testdata=test testout=tout testlist; class crop; testclass crop; testid xvalues; var x1-x4; title2 '检验数据的判别'; run; proc print data=tout; title2 '检验数据的判别结果'; run; /* 鸢尾花数据的聚类分析 */ proc cluster data=samp.iris method=ward outtree=otree pseudo ccc; var petallen petalwid sepallen sepalwid; copy species; run; proc tree data=otree graphics horizontal nclusters=3 out=oclust; copy species; where _ncl_ <= 30; run; proc freq data=oclust; tables species*cluster / nopct norow nocol; run;